includes:
  in_header: preamble.tex
  
library(ggplot2)
library(plm)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plm':
## 
##     between, lag, lead
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(writexl)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(readr)
library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(stringr)
library(broom)
library(tidyr)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
library(PerformanceAnalytics)
## Loading required package: xts
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
library(Metrics)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:readr':
## 
##     col_factor
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(glue)

EFPIA/ Europe

EFPIA R&D spending by country by year in euros

From yearly R&D reports 2004: Link Removed. Screenshot can be found in original data folder

2005: https://www.efpia.eu/media/15485/the-pharmaceutical-industry-in-figures-edition-2007.pdf

2006: https://www.efpia.eu/media/15486/the-pharmaceutical-industry-in-figures-edition-2008.pdf

2007: https://www.pharmine.eu/wp-content/uploads/2014/04/The_Pharma_Industry_in_figures_Edition_2009-20080612-009-EN-v1.pdf

2008: https://www.sfee.gr/wp-content/uploads/2015/05/Figures_2010_Final-20100611-001-EN-v1_0.pdf

2009: https://www.efpia.eu/media/15489/the-pharmaceutical-industry-in-figures-edition-2011.pdf

2010: https://www.sfee.gr/wp-content/uploads/2015/05/EFPIA_Figures_2012_Final.pdf

Data for 2011 could not be found

2012: https://www.phar-in.eu/wp-content/uploads/2014/05/Figures_2014_Final.pdf

2013: https://www.efpia.eu/media/25822/2015-the-pharmaceutical-industry-in-figures.pdf

2014: https://www.efpia.eu/media/25055/the-pharmaceutical-industry-in-figures-june-2016.pdf

2015: https://www.efpia.eu/media/219735/efpia-pharmafigures2017_statisticbroch_v04-final.pdf

2016: https://www.efpia.eu/media/361960/efpia-pharmafigures2018_v07-hq.pdf

2017: https://www.efpia.eu/media/412931/the-pharmaceutical-industry-in-figures-2019.pdf

2018: https://www.efpia.eu/media/554521/efpia_pharmafigures_2020_web.pdf

2019: https://www.efpia.eu/media/602709/the-pharmaceutical-industry-in-figures-2021.pdf

2020: https://www.efpia.eu/media/637143/the-pharmaceutical-industry-in-figures-2022.pdf

2021: https://www.efpia.eu/media/rm4kzdlx/the-pharmaceutical-industry-in-figures-2023.pdf

These can also be found in the the Original Data Folder

efpia_rd <- read.csv("Cleaned Data/Area Level R&D Spending/EFPIA R&D Spending by Country in EU.csv")
efpia_rd <- clean_names(efpia_rd)
efpia_rd

Formatting - removing commas, making dollars numeric

efpia_rd$r_d_spending_millions_euros <- gsub(",", "", efpia_rd$r_d_spending_millions_euros)
efpia_rd$r_d_spending_millions_euros <- as.numeric(efpia_rd$r_d_spending_millions_euros)
## Warning: NAs introduced by coercion
efpia_rd

Sum EU R&D spending by year over countries to get total spending

efpia_rd_totals <- efpia_rd %>%
  group_by(year) %>%
  summarise(total_mil_euros = sum(r_d_spending_millions_euros, na.rm = TRUE))
efpia_rd_totals

Imputing 2011 with average of 2011 and 2012 efpia_rd_totals

rd_2012 <- efpia_rd_totals[efpia_rd_totals$year == 2012, ]$total_mil_euros
rd_2010 <- efpia_rd_totals[efpia_rd_totals$year == 2010, ]$total_mil_euros
avg_2010_2012 <- (rd_2010 + rd_2012) / 2

efpia_rd_totals <- rbind(efpia_rd_totals, c(2011, avg_2010_2012))
efpia_rd_totals <- efpia_rd_totals[order(efpia_rd_totals$year),]
efpia_rd_totals

Euros to Dollars https://www.macrotrends.net/2548/euro-dollar-exchange-rate-historical-chart

eurotousdavgconversion <- read_xlsx("Original Data/Euro to USD Conversion Avg by Year.xlsx")
eurotousdavgconversion
eurotodollars <- as.data.frame(cbind(eurotousdavgconversion$Year, eurotousdavgconversion$`Average Closing Price`))
colnames(eurotodollars) <- c("year", "euros_to_dollar_conversion")
eurotodollars

Multiply euros by conversion rate to get r&d spending in dollars, transform to billions of dollars

rd_conversion_merged_df <- merge(efpia_rd_totals, eurotodollars, by = "year")
rd_conversion_merged_df
efpia_rd_totals$total_rd_mil_dollars <- rd_conversion_merged_df$euros_to_dollar_conversion * rd_conversion_merged_df$total_mil_euros
efpia_rd_totals$annual_domestic_RD_spending_bil_dollars <- efpia_rd_totals$total_rd_mil_dollars /1000

efpia_rd_totals

EU orphan drug market authorizations

EU Orphan Drugs were scraped from the pdf: https://www.orpha.net/pdfs/orphacom/cahiers/docs/GB/list_of_orphan_drugs_in_europe.pdf It can also be found in the the Original Data Folder

Combines lists:

  • List of orphan medicinal products in Europe with European orphan designation and European marketing authorization

  • Annex 1: Orphan medicinal products withdrawn from the European Community Register of orphan medicinal products

  • Annex 2: Orphan medicinal products withdrawn from use in the European Union

eu_orphan_drugs <- read_xlsx("Cleaned Data/Area Level Orphan Authorizations/EU_Orphan_w_Company from https___www.orpha.net_orphacom_cahiers_docs_GB_list_of_orphan_drugs_in_europe.xlsx")
eu_orphan_drugs <- clean_names(eu_orphan_drugs)
eu_orphan_drugs <- subset(eu_orphan_drugs, market_authorization_year < 2022)

eu_orphan_drugs

Check for duplicated rows

eu_orphan_drugs[duplicated(eu_orphan_drugs),]

Number of MAs per company in EU - note companies may go by multiple names

eu_company_freq <- eu_orphan_drugs %>%
  count(firm) %>%
  arrange(desc(n))
colnames(eu_company_freq) <- c("Firm", "Market Authorization Count")
eu_company_freq

EU number of market authorizations for each drug

eu_orphan_drugs$tradename <- tolower(eu_orphan_drugs$tradename)

eu_ma_freq <- eu_orphan_drugs %>%
  count(tradename) %>%
  arrange(desc(n))

colnames(eu_ma_freq) <- c("Trade Name", "Market Authorizations")
eu_ma_freq

How many ODs with more than 1 authorization in EU

nrow(eu_ma_freq[eu_ma_freq$`Market Authorizations` > 1, ])
## [1] 9
eu_ma_freq$`Trade Name` <- str_to_title(eu_ma_freq$`Trade Name`)

eu_ma_freq[eu_ma_freq$`Market Authorizations` > 1, ]

Export to table

stargazer(eu_ma_freq[1:9,], title= "EU Market Authorization Counts for Orphan Drugs with Over 1 Market Authorization", rownames = FALSE, align=TRUE, summary=FALSE, type = "html", out="Final Figures Formatted/EU_OD_Multiple_MAs.htm")
## 
## <table style="text-align:center"><caption><strong>EU Market Authorization Counts for Orphan Drugs with Over 1 Market Authorization</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Trade Name</td><td>Market Authorizations</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Glivec</td><td>6</td></tr>
## <tr><td>Nexavar</td><td>3</td></tr>
## <tr><td>Revlimid</td><td>3</td></tr>
## <tr><td>Carbaglu</td><td>2</td></tr>
## <tr><td>Soliris</td><td>2</td></tr>
## <tr><td>Torisel</td><td>2</td></tr>
## <tr><td>Tracleer</td><td>2</td></tr>
## <tr><td>Yondelis</td><td>2</td></tr>
## <tr><td>Zavesca</td><td>2</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr></table>

Look at some instances where there are multiple market authorizations for the same drug

eu_orphan_drugs[eu_orphan_drugs$tradename == "glivec",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "nexavar",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "revlimid",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "carbaglu",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "soliris",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "torisel",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "tracleer",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "yondelis",]
eu_orphan_drugs[eu_orphan_drugs$tradename == "zavesca",]

Select only new drugs in the EU

eu_orphan_drugs_new <- eu_orphan_drugs %>%
  group_by(tradename) %>%  
  slice_min(order_by = market_authorization_year, n = 1, with_ties = FALSE) %>%  # Select the entry with the earliest market authorization year
  ungroup() 

eu_orphan_drugs_new

Count new EU orphan drug market authorizations by year

eu_new_od_ma_freqs <- as.data.frame(table(eu_orphan_drugs_new$market_authorization_year))
eu_new_od_ma_freqs
colnames(eu_new_od_ma_freqs) <- c("Year", "Freq New Orphan Drug MAs")
eu_new_od_ma_freqs
eu_new_od_ma_freqs$Year <- as.character(eu_new_od_ma_freqs$Year)
eu_new_od_ma_freqs$Year <- as.numeric(eu_new_od_ma_freqs$Year)
eu_new_od_ma_freqs <- eu_new_od_ma_freqs[order(-eu_new_od_ma_freqs$Year),] 
eu_new_od_ma_freqs

Select all MAs in the EU

Count EU market authorizations by year

eu_od_ma_freqs <- as.data.frame(table(eu_orphan_drugs$market_authorization_year))
colnames(eu_od_ma_freqs) <- c("Year", "Freq Orphan Drug MAs")
eu_od_ma_freqs
eu_od_ma_freqs$Year <- as.character(eu_od_ma_freqs$Year)
eu_od_ma_freqs$Year <- as.numeric(eu_od_ma_freqs$Year)
eu_od_ma_freqs <- eu_od_ma_freqs[order(-eu_od_ma_freqs$Year),] 
head(eu_od_ma_freqs)
eu_od_ma_freqs <- merge(eu_od_ma_freqs, eu_new_od_ma_freqs, by = "Year")
eu_od_ma_freqs

Graph of EU Market Authorized orphan drugs

eu_od_ma_freqs <- eu_od_ma_freqs <- eu_od_ma_freqs[eu_od_ma_freqs$Year > 2003,]

ggplot(eu_od_ma_freqs, aes(x = Year)) +
  geom_line(aes(y = `Freq Orphan Drug MAs`, color="All Orphan Drug MAs")) +
  geom_line(aes(y = `Freq New Orphan Drug MAs`, color="MAs for New Orphan Drugs")) +
  labs(x = "Year",
       y = "Number of Market Authorizations in the EU",
       title="Annual Number of Orphan Drug Market Authorizations in the EU"
       ) +
  scale_color_manual(name = " ", values = c("lightskyblue3", "darkred"))+
  theme_minimal()

Merge orphan drug and R&D spending tables for EU

colnames(efpia_rd_totals) <- c("Year", "total_rd_euros", "total_rd_mil_dollars" , "total_rd_bil_dollars")
eu_rd_mas <- merge(efpia_rd_totals,eu_od_ma_freqs,by="Year")
eu_rd_mas <- clean_names(eu_rd_mas)
eu_rd_mas

PhRMA/ United States

PhRMA R&D spending by year

https://phrma.org/-/media/Project/PhRMA/PhRMA-Org/PhRMA-Refresh/Report-PDFs/P-R/PhRMA_membership-survey_2022_final.pdf

phrma_us_rd <- read.csv("Cleaned Data/Area Level R&D Spending/US Domestic R&D and R&D Abroad,* PhRMA Member Companies- 1983–2021 (dollar figures in millions) .csv")
head(phrma_us_rd,20)

US orphan drug Market Authorizations

https://www.accessdata.fda.gov/scripts/opdlisting/oopd/ Only approved products, accessed in 2023

us_orphan_drugs <- read.csv("Original Data/Area Level Orphan Authorizations/US_FDA_OD.csv")

us_orphan_drugs <- clean_names(us_orphan_drugs)
us_orphan_drugs <- us_orphan_drugs[!is.na(us_orphan_drugs$marketing_approval_date), ] 
us_orphan_drugs
tail(us_orphan_drugs, n=5)
us_orphan_drugs <- head(us_orphan_drugs[order(us_orphan_drugs$marketing_approval_date),],-2) #notes removed at bottom of df

us_orphan_drugs$marketing_approval_date <- as.Date(us_orphan_drugs$marketing_approval_date, format="%m/%d/%y")

us_orphan_drugs <- us_orphan_drugs[order(us_orphan_drugs$marketing_approval_date),]
us_orphan_drugs$market_authorization_year <- year(us_orphan_drugs$marketing_approval_date)
us_orphan_drugs <- subset(us_orphan_drugs, market_authorization_year < 2022)

us_orphan_drugs

Check for duplicated rows. Remove one row because duplicated

us_orphan_drugs[duplicated(us_orphan_drugs),]                     # see if rows are duplicated
us_orphan_drugs[us_orphan_drugs$generic_name == "tacrolimus" ,]   # view the duplication
us_orphan_drugs[710, ]                                            # make sure we are removing correct row
us_orphan_drugs <- us_orphan_drugs[-710, ]                        # remove duplicated row
us_orphan_drugs[duplicated(us_orphan_drugs),]                     # ensure we removed duplicated row

US Number of MAs per company - note companies go by multiple names ex. Novartis Pharmaceuticals Corporation/ Novartis Pharmaceuticals Corp.

us_company_freq <- us_orphan_drugs %>%
  count(sponsor_company) %>%
  arrange(desc(n))
colnames(us_company_freq) <- c("Sponsor Company", "Market Authorizations")
us_company_freq
us_orphan_drugs

Where Trade Name is Null, replace with generic name

us_orphan_drugs[us_orphan_drugs$trade_name == "" ,]
us_orphan_drugs[us_orphan_drugs$trade_name == "" ,]$trade_name = us_orphan_drugs[us_orphan_drugs$trade_name == "" ,]$generic_name
us_orphan_drugs$trade_name = tolower(us_orphan_drugs$trade_name)

US Number of market authorizations per drug

us_ma_freq <- us_orphan_drugs %>%
  count(trade_name) %>%
  arrange(desc(n))
colnames(us_ma_freq) <- c("Trade Name", "Market Authorizations")

us_ma_freq$`Trade Name` <- str_to_title(us_ma_freq$`Trade Name`)

us_ma_freq

How many ODs with more than 1 authorization in US

nrow(us_ma_freq[us_ma_freq$`Market Authorizations` > 1, ])
## [1] 181

How many ODs with more than 5 authorizations in US

nrow(us_ma_freq[us_ma_freq$`Market Authorizations` > 5, ])
## [1] 12
us_ma_freq[us_ma_freq$`Market Authorizations` > 5, ]

Export to table

stargazer(us_ma_freq[1:12,], title= "US Market Authorization Counts for Orphan Drugs with Over 5 Market Authorizations", rownames = FALSE, align=TRUE, summary=FALSE, type = "html", out="Final Figures Formatted/US_OD_Multiple_MAs.htm")
## 
## <table style="text-align:center"><caption><strong>US Market Authorization Counts for Orphan Drugs with Over 5 Market Authorizations</strong></caption>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Trade Name</td><td>Market Authorizations</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td>Keytruda</td><td>13</td></tr>
## <tr><td>Avastin</td><td>11</td></tr>
## <tr><td>Lynparza</td><td>11</td></tr>
## <tr><td>Imbruvica</td><td>10</td></tr>
## <tr><td>Kalydeco</td><td>10</td></tr>
## <tr><td>Opdivo</td><td>10</td></tr>
## <tr><td>Gleevec</td><td>9</td></tr>
## <tr><td>Revlimid</td><td>9</td></tr>
## <tr><td>Adcetris</td><td>7</td></tr>
## <tr><td>Darzalex</td><td>7</td></tr>
## <tr><td>Humira</td><td>7</td></tr>
## <tr><td>Novoseven</td><td>6</td></tr>
## <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr></table>

Look at some instances where there are multiple market authorizations for the same drug

us_orphan_drugs[us_orphan_drugs$trade_name == "keytruda",]
us_orphan_drugs[us_orphan_drugs$trade_name == "avastin",]
us_orphan_drugs[us_orphan_drugs$trade_name == "lynparza",]

Select only new drugs in the US

us_orphan_drugs_new <- us_orphan_drugs %>%
  group_by(trade_name) %>%  
  slice_min(order_by = marketing_approval_date, n = 1, with_ties = FALSE) %>%  # Select the entry with the earliest market authorization year
  ungroup() 

us_orphan_drugs_new
us_orphan_drugs_new$Marketing.year <- year(as.Date(us_orphan_drugs_new$marketing_approval_date)) 
us_new_od_ma_freqs <- as.data.frame(table(us_orphan_drugs_new$Marketing.year))
colnames(us_new_od_ma_freqs) <- c("Year", "Freq New Orphan Drug MAs")
us_new_od_ma_freqs

Select all MAs in the US. Format US MA date as year

us_orphan_drugs$Marketing.year <- year(as.Date(us_orphan_drugs$marketing_approval_date)) 
us_od_ma_freqs <- as.data.frame(table(us_orphan_drugs$Marketing.year))
colnames(us_od_ma_freqs) <- c("Year", "Freq Orphan Drug MAs")

us_od_ma_freqs$Year <- as.character(us_od_ma_freqs$Year)
us_od_ma_freqs$Year <- as.numeric(us_od_ma_freqs$Year)
us_od_ma_freqs <- us_od_ma_freqs[order(-us_od_ma_freqs$Year),] 

us_od_ma_freqs <- merge(us_od_ma_freqs, us_new_od_ma_freqs, by = "Year")

us_od_ma_freqs

Graph of US Market Authorizations orphan drugs

us_od_ma_freqs <- us_od_ma_freqs[us_od_ma_freqs$Year > 2003,]
us_od_ma_freqs <- us_od_ma_freqs[us_od_ma_freqs$Year < 2022,]

ggplot(us_od_ma_freqs, aes(x = Year)) +
  geom_line(aes(y = `Freq Orphan Drug MAs`, color="All Orphan Drug MAs")) +
  geom_line(aes(y = `Freq New Orphan Drug MAs`, color="MAs for New Orphan Drugs")) +
  labs(x = "Year",
       y = str_wrap("Number of Market Authorizations in the US", width=60),
       title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the US", width=60), 
       ) +
  scale_color_manual(name = " ", values = c("lightskyblue3", "darkred"))+
  theme_minimal()

Merge US orphan drug df with PhRMA r&d df

#phrma_us_rd <- phrma_us_rd[-dim(phrma_us_rd)[1],] # last row is NA
us_rd_mas <- merge(phrma_us_rd,us_od_ma_freqs,by="Year")
us_rd_mas

Format

us_rd_mas$Domestic.R.D. <- parse_number(us_rd_mas$Domestic.R.D.) #dollars are in millions
us_rd_mas <- clean_names(us_rd_mas)
us_rd_mas

US dataframe simplified

us_rd_mas$annual_domestic_RD_spending_bil_dollars <- us_rd_mas$domestic_r_d /1000 #convert from millions of dollars to billions
us_rd_mas_simplified <- as.data.frame(cbind(us_rd_mas$year, us_rd_mas$annual_domestic_RD_spending_bil_dollars, us_rd_mas$freq_orphan_drug_m_as, us_rd_mas$freq_new_orphan_drug_m_as))

colnames(us_rd_mas_simplified) <- c("year", "annual_domestic_RD_spending_bil_dollars", "freq_orphan_drug_mas", "freq_new_orphan_drug_mas")

us_rd_mas_simplified$area <- rep("US", nrow(us_rd_mas_simplified)) # write US indicator in area column

us_rd_mas_simplified$lagged_annual_domestic_RD_spending_bil_dollars <- lag(us_rd_mas_simplified$annual_domestic_RD_spending_bil_dollars) # for lagged R&D spending - not used

us_rd_mas_simplified

EU dataframe simplified

eu_rd_mas_simplified <- as.data.frame(cbind(eu_rd_mas$year, eu_rd_mas$total_rd_bil_dollars, eu_rd_mas$freq_orphan_drug_m_as, eu_rd_mas$freq_new_orphan_drug_m_as))

colnames(eu_rd_mas_simplified) <- c("year", "annual_domestic_RD_spending_bil_dollars", "freq_orphan_drug_mas", "freq_new_orphan_drug_mas")

eu_rd_mas_simplified$area <- rep("EU", nrow(eu_rd_mas_simplified)) # write EU indicator in area column

eu_rd_mas_simplified$lagged_annual_domestic_RD_spending_bil_dollars <- lag(eu_rd_mas_simplified$annual_domestic_RD_spending_bil_dollars) # for lagged R&D spending - not used

eu_rd_mas_simplified

EU GDP Per Capita In Billions of Dollars - data is in current US dollars. Data was pulled in 2023 however, only growth rates are used in our analysis

https://www.macrotrends.net/global-metrics/countries/EUU/european-union/gdp-per-capita

eu_gdp_percapita_annual <- read.csv("Original Data/GDP/european-union-gdp-per-capita-annual.csv", skip=16)
eu_gdp_percapita_annual$year <- year(as.Date(eu_gdp_percapita_annual$date))
eu_gdp_percapita_annual <- clean_names(eu_gdp_percapita_annual)
colnames(eu_gdp_percapita_annual)[2] <- "gdp_per_capita_usd"
colnames(eu_gdp_percapita_annual)[3] <- "gdp_per_capita_annual_growthrate_usd"
eu_gdp_percapita_annual

EU GDP (Billion USD) - not used in analysis

https://www.macrotrends.net/global-metrics/countries/EUU/european-union/gdp-gross-domestic-product EU GDP per capita is measured in billions of dollars and the growth rate is taken from those measurements

eu_gdp_annual <- read.csv("Original Data/GDP/european-union-gdp-gross-domestic-product.csv", skip=8)
eu_gdp_annual <- eu_gdp_annual[c(24:64),c(1:4)]
eu_gdp_annual$year <- year(as.Date(eu_gdp_annual$Date, format="%m/%d/%y"))
eu_gdp_annual
eu_gdp_annual <- clean_names(eu_gdp_annual)
colnames(eu_gdp_annual)[2] <- "gdp_billions_of_usd"

eu_gdp_annual

Merge GDP Per Capita, GDP Growth rate into same df

eu_rd_mas_simplified <- merge(eu_rd_mas_simplified, eu_gdp_percapita_annual[,c("year", "gdp_per_capita_annual_growthrate_usd")], by = "year", all.x = TRUE)
eu_rd_mas_simplified <- merge(eu_rd_mas_simplified, eu_gdp_annual[,c("year", "gdp_billions_of_usd")], by = "year", all.x = TRUE)

eu_rd_mas_simplified

US GDP Per Capita In Dollars - data is in current US dollars. Data was pulled in 2023 however, only growth rates are used in our analysis

https://www.macrotrends.net/global-metrics/countries/USA/united-states/gdp-per-capita

us_gdp_percapita_annual <- read.csv("Original Data/GDP/united-states-gdp-per-capita-annual.csv", skip=16)
us_gdp_percapita_annual$year <- year(as.Date(us_gdp_percapita_annual$date))

us_gdp_percapita_annual <- clean_names(us_gdp_percapita_annual)
colnames(us_gdp_percapita_annual)[2] <- "gdp_per_capita_usd"
colnames(us_gdp_percapita_annual)[3] <- "gdp_per_capita_annual_growthrate_usd"

us_gdp_percapita_annual

US GDP (Billion USD) - not used in analysis

https://www.macrotrends.net/global-metrics/countries/usa/united-states/gdp-gross-domestic-product#:~:text=U.S.%20gdp%20for%202023%20was,a%200.92%25%20decline%20from%202019

us_gdp_annual <- read.csv("Original Data/GDP/united-states-gdp-gross-domestic-product.csv", skip=8)
us_gdp_annual <- us_gdp_annual[c(24:64),c(1:4)]
us_gdp_annual$year <- year(as.Date(us_gdp_annual$Date, format="%m/%d/%y"))

us_gdp_annual <- clean_names(us_gdp_annual)
colnames(us_gdp_annual)[2] <- "gdp_billions_of_usd"

us_gdp_annual

Merge GDP Per Capita, GDP Growth rate into same df

us_rd_mas_simplified <- merge(us_rd_mas_simplified, us_gdp_percapita_annual[,c("year", "gdp_per_capita_annual_growthrate_usd")], by = "year", all.x = TRUE)
us_rd_mas_simplified <- merge(us_rd_mas_simplified, us_gdp_annual[,c("year", "gdp_billions_of_usd")], by = "year", all.x = TRUE)
us_rd_mas_simplified

Limit dataframe to 2004 and later, make lagged R&D spending NA at year 2004 (although lagged spending not used in analysis)

combined_rd_ma_df <- rbind(eu_rd_mas_simplified, us_rd_mas_simplified)
combined_rd_ma_df <- combined_rd_ma_df %>%
  filter(year > 2003)
combined_rd_ma_df[combined_rd_ma_df$area =="US" & combined_rd_ma_df$year==2004,]$lagged_annual_domestic_RD_spending_bil_dollars <-  NA
combined_rd_ma_df

Data Visualizations

ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_orphan_drug_mas, color = area)) +
  geom_point() +
  labs(x = "Annual Domestic RD Spending (Billions USD)",
       y = "Number of Orphan Drug Market Authorizations",
       color = "Region",
       title=str_wrap("Orphan Drug Market Authorizations by Annual Domestic R&D Spending in the EU and US", 60)) +
  scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year , y = freq_orphan_drug_mas, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = "Number of Orphan Drug Market Authorizations", width=30,
       color = "Region",
       title=str_wrap("Annual Number of Orphan Drug Market Authorizations in the EU and US", width=70)) +
    theme_minimal() +
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100'))

ggplot(combined_rd_ma_df, aes(x = year , y = freq_new_orphan_drug_mas, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = str_wrap("Number of Market Authorizations for New Orphan Drugs", width=40),
       color = "Region",
       title=str_wrap("Annual Number of Market Authorizations for New Orphan Drugs in the EU and US", width=60)) +
    theme_minimal() +
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) 

ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = "Domestic R&D Spending (Billions USD)",
       color = "Region",
       title="Annual Domestic R&D Spending in the EU and US") +
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year, y = gdp_billions_of_usd, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = "GDP (Billions USD)",
       color = "Region",
       title="GDP in the EU and US") +
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

ggplot(combined_rd_ma_df, aes(x = year, y = gdp_per_capita_annual_growthrate_usd, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = "GDP Per Capita on Last Day of Year (USD)",
       color = "Region",
       title="GDP Per Capita on Last Day of Year in the EU and US") +
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

combined_rd_ma_df
ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars/gdp_billions_of_usd, color = area)) +
  geom_line() +
  labs(x = "Year",
       y = "Domestic R&D Spending / GDP",
       color = "Region",
       title=str_wrap("Annual Domestic Pharmaceutical R&D Spending as a Fraction of GDP in the EU and US", width=50))  +
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_orphan_drug_mas, color = area)) +
  geom_point() +
  labs(x = "Domestic R&D Spending (Billions USD)",
       y = "Number of Orphan Drug Market Authorizations",
       color = "Region",
       title=str_wrap("Annual Number of Orphan Drug Market Authorizations by Domestic R&D Spending in the EU and US", width=50)) +
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_new_orphan_drug_mas, color = area)) +
  geom_point() +
  labs(x = "Domestic R&D Spending",
       y = "Number of MAs for New Orphan Drugs",
       color = "Region",
       title="MAs for new Orphan Drugs by Domestic R&D Spending in the EU and US") +
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()

Orphan Drug Frequencies by Year by Area

hist(subset(combined_rd_ma_df, area=="US")$freq_orphan_drug_mas, main="US Frequencies of Annual Orphan Drug Market Authorizations", xlab="Number of Annual US Orphan Drug Market Authorizations")

hist(subset(combined_rd_ma_df, area=="EU")$freq_orphan_drug_mas, main="EU Frequencies of Annual Orphan Drug Market Authorizations",xlab="Number of Annual EU Orphan Drug Market Authorizations")

New Orphan Drugs Frequencies by Year by Area

hist(subset(combined_rd_ma_df, area=="US")$freq_new_orphan_drug_mas, main="US Frequencies of Annual New Orphan Drug Market Authorizations", xlab="Number of Annual US Orphan Drug Market Authorizations")

hist(subset(combined_rd_ma_df, area=="EU")$freq_new_orphan_drug_mas, main="EU Frequencies of Annual New Orphan Drug Market Authorizations",xlab="Number of Annual EU Orphan Drug Market Authorizations")

Making a years after 2004 variable

combined_rd_ma_df$yearsafter2004 <- combined_rd_ma_df$year - 2004
combined_rd_ma_df

VIFs & Correlations of preditors

Models to find VIFs

mod.rd.vif.check.rd <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + yearsafter2004 +  gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
vif(mod.rd.vif.check.rd)
##                      as.factor(area)                       yearsafter2004 
##                             1.000047                             1.023525 
## gdp_per_capita_annual_growthrate_usd 
##                             1.023572
mod.rd.vif.check.od <- lm(freq_orphan_drug_mas ~  as.factor(area) + yearsafter2004 +  gdp_per_capita_annual_growthrate_usd + annual_domestic_RD_spending_bil_dollars, data=combined_rd_ma_df)
vif(mod.rd.vif.check.od)
##                         as.factor(area)                          yearsafter2004 
##                                1.448334                                3.550554 
##    gdp_per_capita_annual_growthrate_usd annual_domestic_RD_spending_bil_dollars 
##                                1.089107                                3.919797

Correlation Matrix

cor(combined_rd_ma_df[, c("annual_domestic_RD_spending_bil_dollars",
                          "yearsafter2004",
                          "gdp_per_capita_annual_growthrate_usd"
  
)])
##                                         annual_domestic_RD_spending_bil_dollars
## annual_domestic_RD_spending_bil_dollars                             1.000000000
## yearsafter2004                                                      0.783319632
## gdp_per_capita_annual_growthrate_usd                                0.009881313
##                                         yearsafter2004
## annual_domestic_RD_spending_bil_dollars      0.7833196
## yearsafter2004                               1.0000000
## gdp_per_capita_annual_growthrate_usd        -0.1516009
##                                         gdp_per_capita_annual_growthrate_usd
## annual_domestic_RD_spending_bil_dollars                          0.009881313
## yearsafter2004                                                  -0.151600941
## gdp_per_capita_annual_growthrate_usd                             1.000000000

Summmaries

R&D spending growth rates by year

rdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "EU", ]$annual_domestic_RD_spending_bil_dollars 
laggedrdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "EU", ]$lagged_annual_domestic_RD_spending_bil_dollars 
EU_annual_domestic_RD_spending_bil_dollars_growth_rate <- rdspending/laggedrdspending
EU_annual_domestic_RD_spending_bil_dollars_growth_rate
##  [1]        NA 1.0296598 1.1576633 1.1422398 1.0950631 0.9774597 0.9692483
##  [8] 1.0872053 0.9639885 1.0449788 1.0146180 0.9067316 1.0116816 1.0590698
## [15] 1.0736374 0.9868447 1.0691355 1.1101822
rdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "US", ]$annual_domestic_RD_spending_bil_dollars 
laggedrdspending <- combined_rd_ma_df[combined_rd_ma_df$area == "US", ]$lagged_annual_domestic_RD_spending_bil_dollars 
US_annual_domestic_RD_spending_bil_dollars_growth_rate <- rdspending/laggedrdspending
US_annual_domestic_RD_spending_bil_dollars_growth_rate
##  [1]        NA 1.0478253 1.0968355 1.0777352 0.9716650 0.9939530 1.1508117
##  [8] 0.8939616 1.0312479 1.0769337 1.0084489 1.1809938 1.0895376 1.0636573
## [15] 1.1159483 1.0343509 1.1251628 1.0994019
rdspending_growth_rates <- data.frame("Year"=seq(2004,2021), "US"=round(US_annual_domestic_RD_spending_bil_dollars_growth_rate,2), "EU"=round(EU_annual_domestic_RD_spending_bil_dollars_growth_rate,2))

rdspending_growth_rates$US <- as.character(rdspending_growth_rates$US )
rdspending_growth_rates$EU <- as.character(rdspending_growth_rates$EU )

# make 2011 NA because 2011 values were imputed 
rdspending_growth_rates[rdspending_growth_rates$Year == 2011,]$EU <- "NA"
rdspending_growth_rates[rdspending_growth_rates$Year == 2012,]$EU <- "NA"

rdspending_growth_rates
stargazer(rdspending_growth_rates,  out="Final Figures Formatted/RDSpendingAnnualGrowthRates.htm" , summary=FALSE, rownames=FALSE, colnames = TRUE, digits=2, digit.separator="",column.sep.width= "15pt", title="Annual Growth Rate of Phramaceutical R&D Spending")
## 
## % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
## % Date and time: Wed, Jan 08, 2025 - 03:12:20
## \begin{table}[!htbp] \centering 
##   \caption{Annual Growth Rate of Phramaceutical R&D Spending} 
##   \label{} 
## \begin{tabular}{@{\extracolsep{15pt}} ccc} 
## \\[-1.8ex]\hline 
## \hline \\[-1.8ex] 
## Year & US & EU \\ 
## \hline \\[-1.8ex] 
## $2004$ &  &  \\ 
## $2005$ & 1.05 & 1.03 \\ 
## $2006$ & 1.1 & 1.16 \\ 
## $2007$ & 1.08 & 1.14 \\ 
## $2008$ & 0.97 & 1.1 \\ 
## $2009$ & 0.99 & 0.98 \\ 
## $2010$ & 1.15 & 0.97 \\ 
## $2011$ & 0.89 & NA \\ 
## $2012$ & 1.03 & NA \\ 
## $2013$ & 1.08 & 1.04 \\ 
## $2014$ & 1.01 & 1.01 \\ 
## $2015$ & 1.18 & 0.91 \\ 
## $2016$ & 1.09 & 1.01 \\ 
## $2017$ & 1.06 & 1.06 \\ 
## $2018$ & 1.12 & 1.07 \\ 
## $2019$ & 1.03 & 0.99 \\ 
## $2020$ & 1.13 & 1.07 \\ 
## $2021$ & 1.1 & 1.11 \\ 
## \hline \\[-1.8ex] 
## \end{tabular} 
## \end{table}

Linear Models of Annual Domestic R&D Spending

Function to Plot Linear Models of Annual Domestic R&D Spending

plot_linear_rd <- function(linear_model, model_number){
  
  #Plot residuals and acf/ pacf of residuals by panel (EU/US)
  combined_rd_ma_df$residuals <- resid(linear_model)
  par(mfrow=c(2,3))
  
  us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
  eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
  
  plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
  acf(us_resid_data$residuals, main="ACF of US Residuals")
  pacf(us_resid_data$residuals, main="PACF of US Residuals")
  #qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
  
  plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
  acf(eu_resid_data$residuals, main="ACF of EU Residuals")
  pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
  #qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
  
  mtext(glue('Linear Model {model_number} of Domestic R&D Spending - Residual Plots'), side = 3, line = -1.1, outer = TRUE)

  predicted <- predict(linear_model , type = "response")

  ggplot(combined_rd_ma_df, aes(x = year, y = annual_domestic_RD_spending_bil_dollars, color = area, alpha="Observed")) +
    geom_point() +
    labs(x = "Year",
         y = "Domestic R&D Spending (Billions USD)",
         alpha = "",
         color = "Region",
         title=glue('Linear Model {model_number} of Annual Domestic R&D Spending'))+
    geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
    scale_alpha_manual(values = c(0.5, 1), guide = guide_legend(order = 1))+
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  theme_minimal()
    
  # https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
  #Controlling legend appearance in ggplot2 with override.aes
  #July 9, 2020 ·  @aosmith16 
  #Ariel Muldoon
  
  # https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
  # Teun van den Brand
  # 2024/02/26

}

Function to Get Model Metrics of Linear Models of Annual Domestic R&D Spending

get_rd_model_metrics <- function(model, model_type){
  
  #Get RMSE
  predicted <- predict(model, type = "response")
  actual <- combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars
  model_rmse <- Metrics::rmse(actual=actual, predicted=predicted)
  
  #Get F stat & pvalue
  model_fstat <- summary(model)$fstatistic[1]
  model_df1 <- summary(model)$fstatistic[2]
  model_df2 <- summary(model)$fstatistic[3]
  model_pval <- pf(model_fstat, model_df1, model_df2, lower.tail = FALSE) 
  # https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
  # Renesh Bedre
  # March 26, 2023
    
  # Get Adj Rsq and AIC
  model_adjRsq <- summary(model)$adj.r.squared
  model_aic <- AIC(model)
  
  #List metrics in a df
  metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq)
  rownames(metrics_df) <- NULL

  return(metrics_df)
}

Linear Model 1 of Annual Domestic R&D Spending

mod.rd.1 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) , data=combined_rd_ma_df)
summary(mod.rd.1)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area), 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.701  -8.972  -0.418   4.094  33.354 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         38.331      2.667  14.374 5.21e-16 ***
## as.factor(area)US    7.926      3.771   2.102   0.0431 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.31 on 34 degrees of freedom
## Multiple R-squared:  0.115,  Adjusted R-squared:  0.08893 
## F-statistic: 4.416 on 1 and 34 DF,  p-value: 0.04308
plot_linear_rd(mod.rd.1, "1")

#Estimate covariance matrix with Newey West
mod.rd.1.vcov <- vcovPL(mod.rd.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.1 <- coeftest(mod.rd.1, vcov = mod.rd.1.vcov) 
mod.rd.nw.1
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        38.3309     1.9857 19.3037  < 2e-16 ***
## as.factor(area)US   7.9255     4.0287  1.9673  0.05736 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.1)
##                        2.5 %   97.5 %
## (Intercept)       34.2955306 42.36628
## as.factor(area)US -0.2617976 16.11283
#Get Metrics
mod.rd.1.metrics <- get_rd_model_metrics(mod.rd.1, "Linear")
mod.rd.1.metrics

Linear Model 2 of Annual Domestic R&D Spending

mod.rd.2 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.rd.2)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     yearsafter2004, data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1660 -5.2646 -0.4164  3.3709 18.3543 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        23.3317     2.2942  10.170 1.06e-11 ***
## as.factor(area)US   7.9255     2.1200   3.738 0.000702 ***
## yearsafter2004      1.7646     0.2043   8.637 5.55e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.36 on 33 degrees of freedom
## Multiple R-squared:  0.7286, Adjusted R-squared:  0.7121 
## F-statistic: 44.28 on 2 and 33 DF,  p-value: 4.528e-10
plot_linear_rd(mod.rd.2, "2")

#Estimate covariance matrix with Newey West
mod.rd.2.vcov <- vcovPL(mod.rd.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.2 <- coeftest(mod.rd.2, vcov = mod.rd.2.vcov) 
mod.rd.nw.2
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)       23.33166    3.17686  7.3442 1.969e-08 ***
## as.factor(area)US  7.92552    4.08929  1.9381    0.0612 .  
## yearsafter2004     1.76462    0.20683  8.5316 7.369e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.2)
##                       2.5 %    97.5 %
## (Intercept)       16.868279 29.795037
## as.factor(area)US -0.394208 16.245244
## yearsafter2004     1.343810  2.185424
#Get Metrics
mod.rd.2.metrics <- get_rd_model_metrics(mod.rd.2, "Linear")
mod.rd.2.metrics

Linear Model 3 of Annual Domestic R&D Spending

mod.rd.3 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.rd.3)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.2708 -5.4330 -0.3892  3.8556 16.1289 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           22.1364     2.4080   9.193 1.70e-10 ***
## as.factor(area)US                      7.9049     2.0872   3.787 0.000634 ***
## yearsafter2004                         1.8088     0.2035   8.889 3.73e-10 ***
## gdp_per_capita_annual_growthrate_usd   0.2555     0.1785   1.431 0.162018    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.261 on 32 degrees of freedom
## Multiple R-squared:  0.7449, Adjusted R-squared:  0.721 
## F-statistic: 31.14 on 3 and 32 DF,  p-value: 1.296e-09
plot_linear_rd(mod.rd.3, "3")

#Estimate covariance matrix with Newey West
mod.rd.3.vcov <- vcovPL(mod.rd.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.3 <- coeftest(mod.rd.3, vcov = mod.rd.3.vcov)
mod.rd.nw.3
## 
## t test of coefficients:
## 
##                                      Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                          22.13638    3.26543  6.7790 1.162e-07 ***
## as.factor(area)US                     7.90493    4.12804  1.9149   0.06448 .  
## yearsafter2004                        1.80878    0.19030  9.5051 7.733e-11 ***
## gdp_per_capita_annual_growthrate_usd  0.25552    0.11530  2.2160   0.03392 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.3, level=0.9)
##                                              5 %       95 %
## (Intercept)                          16.60510859 27.6676593
## as.factor(area)US                     0.91249853 14.8973711
## yearsafter2004                        1.48643572  2.1311166
## gdp_per_capita_annual_growthrate_usd  0.06020474  0.4508275
#Get Metrics
mod.rd.3.metrics <- get_rd_model_metrics(mod.rd.3, "Linear")
mod.rd.3.metrics

Linear Model 4 of Annual Domestic R&D Spending

mod.rd.4 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004 , data=combined_rd_ma_df)
summary(mod.rd.4)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     yearsafter2004 + as.factor(area):yearsafter2004, data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3946 -3.0224  0.8889  3.1603 11.3927 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       30.2932     2.0930  14.474 1.35e-15 ***
## as.factor(area)US                 -5.9977     2.9599  -2.026   0.0511 .  
## yearsafter2004                     0.9456     0.2102   4.499 8.46e-05 ***
## as.factor(area)US:yearsafter2004   1.6380     0.2972   5.511 4.49e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.626 on 32 degrees of freedom
## Multiple R-squared:  0.8607, Adjusted R-squared:  0.8477 
## F-statistic: 65.92 on 3 and 32 DF,  p-value: 8.626e-14
plot_linear_rd(mod.rd.4, "4")

#Estimate covariance matrix with Newey West
mod.rd.4.vcov <- vcovPL(mod.rd.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.4 <- coeftest(mod.rd.4, vcov = mod.rd.4.vcov) 
mod.rd.nw.4
## 
## t test of coefficients:
## 
##                                  Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                      30.29324    2.15653 14.0472 3.103e-15 ***
## as.factor(area)US                -5.99766    4.71995 -1.2707 0.2129950    
## yearsafter2004                    0.94561    0.19025  4.9702 2.167e-05 ***
## as.factor(area)US:yearsafter2004  1.63802    0.44299  3.6977 0.0008122 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.4)
##                                        2.5 %    97.5 %
## (Intercept)                       25.9005414 34.685948
## as.factor(area)US                -15.6118810  3.616569
## yearsafter2004                     0.5580724  1.333142
## as.factor(area)US:yearsafter2004   0.7356826  2.540358
#Get Metrics
mod.rd.4.metrics <- get_rd_model_metrics(mod.rd.4, "Linear")
mod.rd.4.metrics

Linear Model 5 of Annual Domestic R&D Spending

mod.rd.5 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.rd.5)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.4132 -2.8428  0.9883  2.4389 10.3907 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           29.4170     2.2497  13.076 3.72e-14 ***
## as.factor(area)US                     -5.5691     2.9831  -1.867   0.0714 .  
## yearsafter2004                         0.9957     0.2152   4.627 6.23e-05 ***
## gdp_per_capita_annual_growthrate_usd   0.1403     0.1335   1.051   0.3013    
## as.factor(area)US:yearsafter2004       1.5863     0.3008   5.273 9.81e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.619 on 31 degrees of freedom
## Multiple R-squared:  0.8655, Adjusted R-squared:  0.8482 
## F-statistic: 49.88 on 4 and 31 DF,  p-value: 4.498e-13
plot_linear_rd(mod.rd.5, "5")

#Estimate covariance matrix with Newey West
mod.rd.5.vcov <- vcovPL(mod.rd.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.5 <- coeftest(mod.rd.5, vcov = mod.rd.5.vcov)
mod.rd.nw.5
## 
## t test of coefficients:
## 
##                                       Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                          29.416980   2.611732 11.2634 1.737e-12 ***
## as.factor(area)US                    -5.569062   4.963283 -1.1221  0.270456    
## yearsafter2004                        0.995731   0.215517  4.6202 6.360e-05 ***
## gdp_per_capita_annual_growthrate_usd  0.140302   0.095988  1.4617  0.153897    
## as.factor(area)US:yearsafter2004      1.586268   0.463832  3.4199  0.001775 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.5)
##                                             2.5 %     97.5 %
## (Intercept)                           24.09031679 34.7436438
## as.factor(area)US                    -15.69174471  4.5536213
## yearsafter2004                         0.55618135  1.4352798
## gdp_per_capita_annual_growthrate_usd  -0.05546634  0.3360705
## as.factor(area)US:yearsafter2004       0.64027629  2.5322596
#Get Metrics
mod.rd.5.metrics <- get_rd_model_metrics(mod.rd.5, "Linear")
mod.rd.5.metrics

Linear Model 6 of Annual Domestic R&D Spending

mod.rd.6 <- lm(annual_domestic_RD_spending_bil_dollars ~  as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + I(yearsafter2004^2) + I(yearsafter2004^2):as.factor(area) +  gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.rd.6)
## 
## Call:
## lm(formula = annual_domestic_RD_spending_bil_dollars ~ as.factor(area) + 
##     yearsafter2004 + yearsafter2004:as.factor(area) + I(yearsafter2004^2) + 
##     I(yearsafter2004^2):as.factor(area) + gdp_per_capita_annual_growthrate_usd, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7026 -1.8736  0.1949  1.5479  5.0044 
## 
## Coefficients:
##                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           27.37392    2.15944  12.676 2.36e-13 ***
## as.factor(area)US                      5.52983    2.63506   2.099  0.04468 *  
## yearsafter2004                         1.81053    0.55772   3.246  0.00295 ** 
## I(yearsafter2004^2)                   -0.04844    0.03102  -1.562  0.12922    
## gdp_per_capita_annual_growthrate_usd   0.11579    0.09217   1.256  0.21906    
## as.factor(area)US:yearsafter2004      -2.59485    0.71851  -3.611  0.00114 ** 
## as.factor(area)US:I(yearsafter2004^2)  0.24648    0.04040   6.102 1.21e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.831 on 29 degrees of freedom
## Multiple R-squared:  0.9527, Adjusted R-squared:  0.943 
## F-statistic: 97.43 on 6 and 29 DF,  p-value: < 2.2e-16
plot_linear_rd(mod.rd.6, "6")

#Estimate covariance matrix with Newey West
mod.rd.6.vcov <- vcovPL(mod.rd.6, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.rd.nw.6 <- coeftest(mod.rd.6, vcov = mod.rd.6.vcov)
mod.rd.nw.6
## 
## t test of coefficients:
## 
##                                        Estimate Std. Error t value  Pr(>|t|)
## (Intercept)                           27.373916   2.705190 10.1190 5.042e-11
## as.factor(area)US                      5.529825   1.494892  3.6991 0.0008995
## yearsafter2004                         1.810535   0.668122  2.7099 0.0111824
## I(yearsafter2004^2)                   -0.048445   0.036708 -1.3197 0.1972472
## gdp_per_capita_annual_growthrate_usd   0.115789   0.071899  1.6104 0.1181367
## as.factor(area)US:yearsafter2004      -2.594854   0.549954 -4.7183 5.547e-05
## as.factor(area)US:I(yearsafter2004^2)  0.246480   0.029974  8.2233 4.569e-09
##                                          
## (Intercept)                           ***
## as.factor(area)US                     ***
## yearsafter2004                        *  
## I(yearsafter2004^2)                      
## gdp_per_capita_annual_growthrate_usd     
## as.factor(area)US:yearsafter2004      ***
## as.factor(area)US:I(yearsafter2004^2) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.rd.nw.6)
##                                             2.5 %      97.5 %
## (Intercept)                           21.84118268 32.90665020
## as.factor(area)US                      2.47242790  8.58722280
## yearsafter2004                         0.44407169  3.17699759
## I(yearsafter2004^2)                   -0.12352097  0.02663137
## gdp_per_capita_annual_growthrate_usd  -0.03126173  0.26283889
## as.factor(area)US:yearsafter2004      -3.71963522 -1.47007262
## as.factor(area)US:I(yearsafter2004^2)  0.18517747  0.30778301
#Get Metrics
mod.rd.6.metrics <- get_rd_model_metrics(mod.rd.6, "Linear")
mod.rd.6.metrics
mod.rd.metrics <- rbind(mod.rd.1.metrics, mod.rd.2.metrics, mod.rd.3.metrics, mod.rd.4.metrics, mod.rd.5.metrics, mod.rd.6.metrics)

aic_values <- round(mod.rd.metrics$aic)
rmse_values <- round(mod.rd.metrics$rmse, 2)
fstat_values <- round(mod.rd.metrics$fstat, 2)
p_values <- round(mod.rd.metrics$pval, 4)
adjrsq_values <- round(mod.rd.metrics$adjrsq, 2)

stargazer(mod.rd.1, mod.rd.2, mod.rd.3, mod.rd.4, mod.rd.5, mod.rd.6, 
          title="Linear Models of Annual Domestic Pharmaceutical R&D Spending (Not Adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/rdmodelsNonAdjusted.htm",
          covariate.labels = c("US", "Years After 2004", "Years After 2004^{2}","Annual GDP Growth Rate Per Capita", "US:Years After 2004", "US:Years After 2004^{2}", "Intercept"),
          dep.var.labels=c("Annual Domestic Pharmaceutical R&D Spending (Billion USD)"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Linear Models of Annual Domestic Pharmaceutical R&D Spending (Not Adjusted for Error Structure)</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="6">Annual Domestic Pharmaceutical R&D Spending (Billion USD)</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td><td>Model 6</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>7.926<sup>**</sup></td><td>7.926<sup>***</sup></td><td>7.905<sup>***</sup></td><td>-5.998<sup>*</sup></td><td>-5.569<sup>*</sup></td><td>5.530<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(3.771)</td><td>(2.120)</td><td>(2.087)</td><td>(2.960)</td><td>(2.983)</td><td>(2.635)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>1.765<sup>***</sup></td><td>1.809<sup>***</sup></td><td>0.946<sup>***</sup></td><td>0.996<sup>***</sup></td><td>1.811<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.204)</td><td>(0.203)</td><td>(0.210)</td><td>(0.215)</td><td>(0.558)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004<sup>2</sup></td><td></td><td></td><td></td><td></td><td></td><td>-0.048</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.031)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.256</td><td></td><td>0.140</td><td>0.116</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.179)</td><td></td><td>(0.133)</td><td>(0.092)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004</td><td></td><td></td><td></td><td>1.638<sup>***</sup></td><td>1.586<sup>***</sup></td><td>-2.595<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.297)</td><td>(0.301)</td><td>(0.719)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004<sup>2</sup></td><td></td><td></td><td></td><td></td><td></td><td>0.246<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.040)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>38.331<sup>***</sup></td><td>23.332<sup>***</sup></td><td>22.136<sup>***</sup></td><td>30.293<sup>***</sup></td><td>29.417<sup>***</sup></td><td>27.374<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(2.667)</td><td>(2.294)</td><td>(2.408)</td><td>(2.093)</td><td>(2.250)</td><td>(2.159)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>281</td><td>240</td><td>240</td><td>218</td><td>219</td><td>185</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>11</td><td>6.09</td><td>5.9</td><td>4.36</td><td>4.29</td><td>2.54</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.09</td><td>0.71</td><td>0.72</td><td>0.85</td><td>0.85</td><td>0.94</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>4.42</td><td>44.28</td><td>31.14</td><td>65.92</td><td>49.88</td><td>97.43</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0.0431</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models
m1 <- mod.rd.nw.1 
m2 <- mod.rd.nw.2 
m3 <- mod.rd.nw.3
m4 <- mod.rd.nw.4
m5 <- mod.rd.nw.5 
m6 <- mod.rd.nw.6

stargazer(m1, m2, m3, m4, m5, m6, title="Newey-West Adjusted Linear Models of Annual Domestic Pharmaceutical R&D Spending", align=TRUE, type = "html", out="Final Models Formatted/rdmodels.htm",
          covariate.labels = c("US", "Years After 2004", "Years After 2004^{2}","Annual GDP Growth Rate Per Capita", "US:Years After 2004", "US:Years After 2004^{2}", "Intercept"),
          dep.var.labels=c("Annual Domestic Pharmaceutical R&D Spending (Billion USD)"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6"),
          omit.stat = c("all"),
          model.numbers=FALSE,
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Annual Domestic Pharmaceutical R&D Spending</strong></caption>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="6"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="6" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="6">Annual Domestic Pharmaceutical R&D Spending (Billion USD)</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td><td>Model 6</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>7.926<sup>*</sup></td><td>7.926<sup>*</sup></td><td>7.905<sup>*</sup></td><td>-5.998</td><td>-5.569</td><td>5.530<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(4.029)</td><td>(4.089)</td><td>(4.128)</td><td>(4.720)</td><td>(4.963)</td><td>(1.495)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>1.765<sup>***</sup></td><td>1.809<sup>***</sup></td><td>0.946<sup>***</sup></td><td>0.996<sup>***</sup></td><td>1.811<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.207)</td><td>(0.190)</td><td>(0.190)</td><td>(0.216)</td><td>(0.668)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004<sup>2</sup></td><td></td><td></td><td></td><td></td><td></td><td>-0.048</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.037)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.256<sup>**</sup></td><td></td><td>0.140</td><td>0.116</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.115)</td><td></td><td>(0.096)</td><td>(0.072)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004</td><td></td><td></td><td></td><td>1.638<sup>***</sup></td><td>1.586<sup>***</sup></td><td>-2.595<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.443)</td><td>(0.464)</td><td>(0.550)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">US:Years After 2004<sup>2</sup></td><td></td><td></td><td></td><td></td><td></td><td>0.246<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td>(0.030)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>38.331<sup>***</sup></td><td>23.332<sup>***</sup></td><td>22.136<sup>***</sup></td><td>30.293<sup>***</sup></td><td>29.417<sup>***</sup></td><td>27.374<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(1.986)</td><td>(3.177)</td><td>(3.265)</td><td>(2.157)</td><td>(2.612)</td><td>(2.705)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>281</td><td>240</td><td>240</td><td>218</td><td>219</td><td>185</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>11</td><td>6.09</td><td>5.9</td><td>4.36</td><td>4.29</td><td>2.54</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.09</td><td>0.71</td><td>0.72</td><td>0.85</td><td>0.85</td><td>0.94</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>4.42</td><td>44.28</td><td>31.14</td><td>65.92</td><td>49.88</td><td>97.43</td></tr>
## <tr><td style="text-align:left">p-value</td><td>0.0431</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="7" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="6" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Linear Models of Number of Orphan Drug Market Authorizations

Function to Plot Models of Number of Orphan Drug Market Authorizations (Linear or Poisson Models)

plot_lin_pois_od <- function(model, model_number, model_type){
  
  #Plot residuals and acf/ pacf of residuals by panel (EU/US)
  predicted <- predict(model , type = "response")
  actual <- combined_rd_ma_df$freq_orphan_drug_mas

  combined_rd_ma_df$residuals <- actual-predicted#resid(model)
  par(mfrow =c(2,3))
  
  us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
  eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
  
  plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
  acf(us_resid_data$residuals, main="ACF of US Residuals")
  pacf(us_resid_data$residuals, main="PACF of US Residuals")
  #qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
  
  plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
  acf(eu_resid_data$residuals, main="ACF of EU Residuals")
  pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
  #qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
  
  
  mtext(glue('{model_type} Model {model_number} of Orphan Drug MAs - Residual Plots'), side = 3, line = -1.1, outer = TRUE)

  
  ggplot(combined_rd_ma_df, aes(x = year, y = freq_orphan_drug_mas, color = area, alpha="Observed")) +
    geom_point() +
    labs(x = "Year",
         y = str_wrap("Number of Orphan Drug Market Authorizations", width = 40),
         color = "Region",
         alpha = " ",
         title=str_wrap(glue("{model_type} Model {model_number} of Annual Orphan Drug Market Authorizations"), width = 60))+
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
    geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
    scale_alpha_manual(values = c(0.5, 1))+
  theme_minimal()
    
  # https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
  #Controlling legend appearance in ggplot2 with override.aes
  #July 9, 2020 ·  @aosmith16 
  
  # https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
  #Teun van den Brand
  #2024/02/26

}

Get Metrics of Models of Number of Orphan Drug Market Authorizations (Linear or Poisson Models)

get_od_model_metrics <- function(model, model_type){
  
  #Get RMSE
  predicted <- predict(model, type = "response")
  actual <- combined_rd_ma_df$freq_orphan_drug_mas
  model_rmse <- Metrics::rmse(actual=actual, predicted=predicted)
  

  model_df1 <- NA
  model_df2 <- NA
  model_adjRsq <- NA
  model_fstat <- NA
  model_pval <- NA
  model_loglik <- NA
  
  if(model_type == "Linear"){
    model_df1 <- summary(model)$fstatistic["numdf"]
    model_df2 <- summary(model)$fstatistic["dendf"]
    model_fstat <- summary(model)$fstatistic["value"]
    model_pval <- pf(model_fstat, model_df1, model_df2, lower.tail = FALSE) 
    # https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
    # Renesh Bedre
    # March 26, 2023
    model_adjRsq <- summary(model)$adj.r.squared
  }
  
  if(model_type == "Poisson"){
    model_loglik <- logLik(model)
  }
  
  model_aic <- AIC(model)
  
  metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq, "LogLik" = model_loglik)
  rownames(metrics_df) <- NULL


  return(metrics_df)
}

Linear Model 1 of Num of Annual Orphan Drug MAs

mod.od.1 <- lm(freq_orphan_drug_mas ~ as.factor(area), data=combined_rd_ma_df)
summary(mod.od.1)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area), data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.667 -11.667  -2.028   4.653  50.333 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         11.389      5.055   2.253   0.0308 *  
## as.factor(area)US   32.278      7.149   4.515 7.24e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21.45 on 34 degrees of freedom
## Multiple R-squared:  0.3748, Adjusted R-squared:  0.3564 
## F-statistic: 20.38 on 1 and 34 DF,  p-value: 7.244e-05
plot_lin_pois_od(mod.od.1, "1", "Linear")

#Estimate covariance matrix with Newey West
mod.od.1.vcov <- vcovPL(mod.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.1 <-coeftest(mod.od.1, vcov = mod.od.1.vcov)
mod.od.nw.1
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)        11.3889     1.3974  8.1500 1.663e-09 ***
## as.factor(area)US  32.2778     9.9247  3.2523  0.002587 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.1)
##                       2.5 %   97.5 %
## (Intercept)        8.549014 14.22876
## as.factor(area)US 12.108452 52.44710
mod.od.1.metrics <- get_od_model_metrics(mod.od.1, "Linear")
mod.od.1.metrics

Linear Model 2 of Num of Annual Orphan Drug MAs

mod.od.2 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.od.2)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.551 -11.583  -2.681  11.086  34.966 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -12.3611     5.6427  -2.191   0.0356 *  
## as.factor(area)US  32.2778     5.2143   6.190 5.52e-07 ***
## yearsafter2004      2.7941     0.5025   5.560 3.53e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.64 on 33 degrees of freedom
## Multiple R-squared:  0.6772, Adjusted R-squared:  0.6576 
## F-statistic: 34.62 on 2 and 33 DF,  p-value: 7.892e-09
plot_lin_pois_od(mod.od.2, "2", "Linear")

#Estimate covariance matrix with Newey West
mod.od.2.vcov <- vcovPL(mod.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.2 <-coeftest(mod.od.2, vcov = mod.od.2.vcov)
mod.od.nw.2
## 
## t test of coefficients:
## 
##                    Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)       -12.36111    7.51879 -1.6440    0.1097    
## as.factor(area)US  32.27778   10.07391  3.2041    0.0030 ** 
## yearsafter2004      2.79412    0.37637  7.4239 1.571e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.2)
##                        2.5 %    97.5 %
## (Intercept)       -27.658208  2.935986
## as.factor(area)US  11.782261 52.773295
## yearsafter2004      2.028395  3.559841
mod.od.2.metrics <- get_od_model_metrics(mod.od.2, "Linear")

Linear Model 3 of Num of Annual Orphan Drug MAs

mod.od.3 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.od.3)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     gdp_per_capita_annual_growthrate_usd, data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.932 -11.179  -3.044  12.434  33.530 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          -15.0152     5.9576  -2.520   0.0169 *  
## as.factor(area)US                     32.2321     5.1638   6.242 5.40e-07 ***
## yearsafter2004                         2.8922     0.5035   5.745 2.28e-06 ***
## gdp_per_capita_annual_growthrate_usd   0.5674     0.4417   1.285   0.2081    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.49 on 32 degrees of freedom
## Multiple R-squared:  0.693,  Adjusted R-squared:  0.6643 
## F-statistic: 24.08 on 3 and 32 DF,  p-value: 2.419e-08
plot_lin_pois_od(mod.od.3, "3", "Linear")

#Estimate covariance matrix with Newey West
mod.od.3.vcov <- vcovPL(mod.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.3 <-coeftest(mod.od.3, vcov = mod.od.3.vcov)
mod.od.nw.3
## 
## t test of coefficients:
## 
##                                       Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                          -15.01520    6.14421 -2.4438  0.020228 *  
## as.factor(area)US                     32.23207   10.02166  3.2162  0.002967 ** 
## yearsafter2004                         2.89217    0.28886 10.0125 2.199e-11 ***
## gdp_per_capita_annual_growthrate_usd   0.56737    0.27724  2.0465  0.049000 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.3)
##                                              2.5 %    97.5 %
## (Intercept)                          -27.530557154 -2.499846
## as.factor(area)US                     11.818618997 52.645527
## yearsafter2004                         2.303792315  3.480552
## gdp_per_capita_annual_growthrate_usd   0.002641035  1.132100
mod.od.3.metrics <- get_od_model_metrics(mod.od.3, "Linear")
vif(mod.od.3)
##                      as.factor(area)                       yearsafter2004 
##                             1.000047                             1.023525 
## gdp_per_capita_annual_growthrate_usd 
##                             1.023572

Linear Model 4 of Num of Annual Orphan Drug MAs

mod.od.4 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df)
summary(mod.od.4)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     as.factor(area):yearsafter2004, data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.4159  -4.9394  -0.3846   5.8571  22.4417 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        6.9942     4.4174   1.583    0.123    
## as.factor(area)US                 -6.4327     6.2472  -1.030    0.311    
## yearsafter2004                     0.5170     0.4436   1.166    0.252    
## as.factor(area)US:yearsafter2004   4.5542     0.6273   7.260    3e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.764 on 32 degrees of freedom
## Multiple R-squared:  0.8781, Adjusted R-squared:  0.8666 
## F-statistic:  76.8 on 3 and 32 DF,  p-value: 1.04e-14
plot_lin_pois_od(mod.od.4, "4", "Linear")

#Estimate covariance matrix with Newey West
mod.od.4.vcov <- vcovPL(mod.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994") 

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.4 <-coeftest(mod.od.4, vcov = mod.od.4.vcov)
mod.od.nw.4
## 
## t test of coefficients:
## 
##                                  Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                       6.99415    1.95792  3.5722  0.001145 ** 
## as.factor(area)US                -6.43275    7.43836 -0.8648  0.393582    
## yearsafter2004                    0.51703    0.16584  3.1176  0.003839 ** 
## as.factor(area)US:yearsafter2004  4.55418    0.70803  6.4322 3.125e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.4)
##                                        2.5 %     97.5 %
## (Intercept)                        3.0059898 10.9823143
## as.factor(area)US                -21.5842014  8.7187043
## yearsafter2004                     0.1792165  0.8548392
## as.factor(area)US:yearsafter2004   3.1119702  5.9963890
mod.od.4.metrics <- get_od_model_metrics(mod.od.4, "Linear")

predicted <- predict(mod.od.4, type = "response")

Linear Model 5 of Num of Annual Orphan Drug MAs

mod.od.5 <- lm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.od.5)
## 
## Call:
## lm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -20.0780  -4.9553  -0.3644   5.8711  22.0730 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            5.4758     4.7757   1.147    0.260    
## as.factor(area)US                     -5.6901     6.3325  -0.899    0.376    
## yearsafter2004                         0.6039     0.4568   1.322    0.196    
## gdp_per_capita_annual_growthrate_usd   0.2431     0.2834   0.858    0.398    
## as.factor(area)US:yearsafter2004       4.4645     0.6386   6.992 7.58e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.805 on 31 degrees of freedom
## Multiple R-squared:  0.8809, Adjusted R-squared:  0.8655 
## F-statistic: 57.31 on 4 and 31 DF,  p-value: 6.978e-14
plot_lin_pois_od(mod.od.5, "5", "Linear")

#Estimate covariance matrix with Newey West
mod.od.5.vcov <- vcovPL(mod.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.od.nw.5 <-coeftest(mod.od.5, vcov = mod.od.5.vcov)
mod.od.nw.5
## 
## t test of coefficients:
## 
##                                      Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                           5.47584    1.81680  3.0140  0.005104 ** 
## as.factor(area)US                    -5.69012    7.99710 -0.7115  0.482081    
## yearsafter2004                        0.60388    0.17722  3.4075  0.001834 ** 
## gdp_per_capita_annual_growthrate_usd  0.24310    0.14954  1.6257  0.114144    
## as.factor(area)US:yearsafter2004      4.46451    0.76598  5.8285 2.001e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.od.nw.5)
##                                             2.5 %     97.5 %
## (Intercept)                            1.77044391  9.1812268
## as.factor(area)US                    -22.00031835 10.6200844
## yearsafter2004                         0.24243509  0.9653205
## gdp_per_capita_annual_growthrate_usd  -0.06188618  0.5480933
## as.factor(area)US:yearsafter2004       2.90228697  6.0267275
mod.od.5.metrics <- get_od_model_metrics(mod.od.5, "Linear")

predicted <- predict(mod.od.5, type = "response")
mod.od.metrics <- rbind(mod.od.1.metrics, mod.od.2.metrics, mod.od.3.metrics, mod.od.4.metrics, mod.od.5.metrics)
aic_values <- round(mod.od.metrics$aic)
rmse_values <- round(mod.od.metrics$rmse, 2)
fstat_values <- round(mod.od.metrics$fstat, 2)
p_values <- round(mod.od.metrics$pval, 4)
adjrsq_values <- round(mod.od.metrics$adjrsq, 2)


stargazer(mod.od.1, mod.od.2, mod.od.3, mod.od.4, mod.od.5, title="Linear Models of Orphan Drug Market Authorizations (Not Adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/orphandrugMAmodelsNonAdjusted.htm",
          covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
          dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Linear Models of Orphan Drug Market Authorizations (Not Adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>32.278<sup>***</sup></td><td>32.278<sup>***</sup></td><td>32.232<sup>***</sup></td><td>-6.433</td><td>-5.690</td></tr>
## <tr><td style="text-align:left"></td><td>(7.149)</td><td>(5.214)</td><td>(5.164)</td><td>(6.247)</td><td>(6.333)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>2.794<sup>***</sup></td><td>2.892<sup>***</sup></td><td>0.517</td><td>0.604</td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.503)</td><td>(0.503)</td><td>(0.444)</td><td>(0.457)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.567</td><td></td><td>0.243</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.442)</td><td></td><td>(0.283)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>4.554<sup>***</sup></td><td>4.465<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.627)</td><td>(0.639)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>11.389<sup>**</sup></td><td>-12.361<sup>**</sup></td><td>-15.015<sup>**</sup></td><td>6.994</td><td>5.476</td></tr>
## <tr><td style="text-align:left"></td><td>(5.055)</td><td>(5.643)</td><td>(5.958)</td><td>(4.417)</td><td>(4.776)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>327</td><td>305</td><td>305</td><td>272</td><td>273</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>20.84</td><td>14.98</td><td>14.61</td><td>9.21</td><td>9.1</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.36</td><td>0.66</td><td>0.66</td><td>0.87</td><td>0.87</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>20.38</td><td>34.62</td><td>24.08</td><td>76.8</td><td>57.31</td></tr>
## <tr><td style="text-align:left">p-value</td><td>1e-04</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.od.nw.1
m2 <- mod.od.nw.2
m3 <- mod.od.nw.3
m4 <- mod.od.nw.4
m5 <- mod.od.nw.5 

stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Linear Models of Orphan Drug Market Authorizations", align=TRUE, type = "html", out="Final Models Formatted/orphandrugMAmodels.htm",
          covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
          dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Orphan Drug Market Authorizations</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>32.278<sup>***</sup></td><td>32.278<sup>***</sup></td><td>32.232<sup>***</sup></td><td>-6.433</td><td>-5.690</td></tr>
## <tr><td style="text-align:left"></td><td>(9.925)</td><td>(10.074)</td><td>(10.022)</td><td>(7.438)</td><td>(7.997)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>2.794<sup>***</sup></td><td>2.892<sup>***</sup></td><td>0.517<sup>***</sup></td><td>0.604<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.376)</td><td>(0.289)</td><td>(0.166)</td><td>(0.177)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.567<sup>**</sup></td><td></td><td>0.243</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.277)</td><td></td><td>(0.150)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>4.554<sup>***</sup></td><td>4.465<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.708)</td><td>(0.766)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>11.389<sup>***</sup></td><td>-12.361</td><td>-15.015<sup>**</sup></td><td>6.994<sup>***</sup></td><td>5.476<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(1.397)</td><td>(7.519)</td><td>(6.144)</td><td>(1.958)</td><td>(1.817)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>327</td><td>305</td><td>305</td><td>272</td><td>273</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>20.84</td><td>14.98</td><td>14.61</td><td>9.21</td><td>9.1</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.36</td><td>0.66</td><td>0.66</td><td>0.87</td><td>0.87</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>20.38</td><td>34.62</td><td>24.08</td><td>76.8</td><td>57.31</td></tr>
## <tr><td style="text-align:left">p-value</td><td>1e-04</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Poisson Models of Number of Orphan Drug Market Authorization Models

Poisson Model 1 of Num of Annual Orphan Drug MAs

Newey-West adjustment for glms with panel data: Zeileis, Achim, and Thomas Lumley. Package “Sandwich” Title Robust Covariance Matrix Estimators. 15 Sept. 2020, pp. 4–6, cran.r-project.org/web/packages/sandwich/sandwich.pdf.

Aghion P, Van Reenen J, Zingales L (2013). “Innovation and Institutional Ownership.” The American Economic Review, 103(1), 277–304. doi:10.1257/aer.103.1.277 https://www.aeaweb.org/articles?id=10.1257/aer.103.1.277

mod.pois.od.1 <- glm(freq_orphan_drug_mas ~ as.factor(area), data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.1)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area), family = poisson(link = "log"), 
##     data = combined_rd_ma_df)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        2.43264    0.06984   34.83   <2e-16 ***
## as.factor(area)US  1.34395    0.07842   17.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 741.47  on 35  degrees of freedom
## Residual deviance: 378.01  on 34  degrees of freedom
## AIC: 554.08
## 
## Number of Fisher Scoring iterations: 5
plot_lin_pois_od(mod.pois.od.1, "1", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.1.vcov <- vcovPL(mod.pois.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.1 <- coeftest(mod.pois.od.1, vcov = mod.pois.od.1.vcov)
mod.pois.od.nw.1
## 
## z test of coefficients:
## 
##                   Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)        2.43264    0.12270 19.8260 < 2.2e-16 ***
## as.factor(area)US  1.34395    0.17073  7.8717 3.498e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.1)
##                      2.5 %   97.5 %
## (Intercept)       2.192152 2.673124
## as.factor(area)US 1.009320 1.678574
mod.pois.od.1.metrics <- get_od_model_metrics(mod.pois.od.1, "Poisson")
mod.pois.od.1.metrics

Poisson Model 2 of Num of Annual Orphan Drug MAs

mod.pois.od.2 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.2)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.365511   0.102632   13.30   <2e-16 ***
## as.factor(area)US 1.343947   0.078423   17.14   <2e-16 ***
## yearsafter2004    0.107718   0.006695   16.09   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 741.469  on 35  degrees of freedom
## Residual deviance:  95.044  on 33  degrees of freedom
## AIC: 273.11
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.2, "2", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.2.vcov <- vcovPL(mod.pois.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.2 <- coeftest(mod.pois.od.2, vcov = mod.pois.od.2.vcov)
mod.pois.od.nw.2
## 
## z test of coefficients:
## 
##                   Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)        1.36551    0.22395  6.0974 1.078e-09 ***
## as.factor(area)US  1.34395    0.17330  7.7551 8.829e-15 ***
## yearsafter2004     0.10772    0.01183  9.1057 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.2)
##                        2.5 %    97.5 %
## (Intercept)       0.92657995 1.8044420
## as.factor(area)US 1.00428727 1.6836064
## yearsafter2004    0.08453205 0.1309037
mod.pois.od.2.metrics <- get_od_model_metrics(mod.pois.od.2, "Poisson")
mod.pois.od.2.metrics

Poisson Model 3 of Num of Annual Orphan Drug MAs

mod.pois.od.3 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.3)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     gdp_per_capita_annual_growthrate_usd, family = poisson(link = "log"), 
##     data = combined_rd_ma_df)
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          1.348465   0.103085   13.08   <2e-16 ***
## as.factor(area)US                    1.338517   0.078467   17.06   <2e-16 ***
## yearsafter2004                       0.106749   0.006686   15.96   <2e-16 ***
## gdp_per_capita_annual_growthrate_usd 0.009457   0.007059    1.34     0.18    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 741.47  on 35  degrees of freedom
## Residual deviance:  93.24  on 32  degrees of freedom
## AIC: 273.3
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.3, "3", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.3.vcov <- vcovPL(mod.pois.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.3 <- coeftest(mod.pois.od.3, vcov = mod.pois.od.3.vcov)
mod.pois.od.nw.3
## 
## z test of coefficients:
## 
##                                       Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                          1.3484650  0.1964902  6.8628 6.754e-12 ***
## as.factor(area)US                    1.3385166  0.1775705  7.5379 4.774e-14 ***
## yearsafter2004                       0.1067490  0.0117507  9.0845 < 2.2e-16 ***
## gdp_per_capita_annual_growthrate_usd 0.0094568  0.0137365  0.6884    0.4912    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.3)
##                                            2.5 %     97.5 %
## (Intercept)                           0.96335126 1.73357880
## as.factor(area)US                     0.99048474 1.68654841
## yearsafter2004                        0.08371814 0.12977990
## gdp_per_capita_annual_growthrate_usd -0.01746631 0.03637987
mod.pois.od.3.metrics <- get_od_model_metrics(mod.pois.od.3, "Poisson")
mod.pois.od.3.metrics

Poisson Model 4 of Num of Annual Orphan Drug MAs

mod.pois.od.4 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.od.4)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     as.factor(area):yearsafter2004, family = poisson(link = "log"), 
##     data = combined_rd_ma_df)
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       2.01416    0.15033  13.399  < 2e-16 ***
## as.factor(area)US                 0.48931    0.17884   2.736 0.006220 ** 
## yearsafter2004                    0.04591    0.01369   3.353 0.000799 ***
## as.factor(area)US:yearsafter2004  0.07982    0.01573   5.074  3.9e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 741.469  on 35  degrees of freedom
## Residual deviance:  69.927  on 32  degrees of freedom
## AIC: 249.99
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.4, "4", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.4.vcov <- vcovPL(mod.pois.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.4 <- coeftest(mod.pois.od.4, vcov = mod.pois.od.4.vcov)
mod.pois.od.nw.4
## 
## z test of coefficients:
## 
##                                  Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                      2.014163   0.217571  9.2575 < 2.2e-16 ***
## as.factor(area)US                0.489305   0.210912  2.3200  0.020343 *  
## yearsafter2004                   0.045913   0.016889  2.7185  0.006557 ** 
## as.factor(area)US:yearsafter2004 0.079825   0.016730  4.7712 1.831e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.4)
##                                       2.5 %     97.5 %
## (Intercept)                      1.58773263 2.44059407
## as.factor(area)US                0.07592652 0.90268446
## yearsafter2004                   0.01281162 0.07901529
## as.factor(area)US:yearsafter2004 0.04703351 0.11261575
mod.pois.od.4.metrics <- get_od_model_metrics(mod.pois.od.4, "Poisson")
mod.pois.od.4.metrics

Poisson Model 5 of Num of Annual Orphan Drug MAs

mod.pois.od.5 <- glm(freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd , data=combined_rd_ma_df, family=poisson(link="log")) 
summary(mod.pois.od.5)
## 
## Call:
## glm(formula = freq_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          1.986725   0.153650  12.930  < 2e-16 ***
## as.factor(area)US                    0.507058   0.179465   2.825 0.004722 ** 
## yearsafter2004                       0.047062   0.013655   3.446 0.000568 ***
## gdp_per_capita_annual_growthrate_usd 0.005395   0.006887   0.783 0.433467    
## as.factor(area)US:yearsafter2004     0.077857   0.015827   4.919 8.68e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 741.469  on 35  degrees of freedom
## Residual deviance:  69.311  on 31  degrees of freedom
## AIC: 251.38
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_od(mod.pois.od.5, "5", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.od.5.vcov <- vcovPL(mod.pois.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.od.nw.5 <- coeftest(mod.pois.od.5, vcov = mod.pois.od.5.vcov)
mod.pois.od.nw.5
## 
## z test of coefficients:
## 
##                                       Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                          1.9867252  0.2009717  9.8856 < 2.2e-16 ***
## as.factor(area)US                    0.5070576  0.2032372  2.4949  0.012599 *  
## yearsafter2004                       0.0470621  0.0164600  2.8592  0.004247 ** 
## gdp_per_capita_annual_growthrate_usd 0.0053946  0.0100360  0.5375  0.590905    
## as.factor(area)US:yearsafter2004     0.0778573  0.0160970  4.8368  1.32e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(mod.pois.od.nw.5)
##                                            2.5 %     97.5 %
## (Intercept)                           1.59282782 2.38062260
## as.factor(area)US                     0.10872005 0.90539517
## yearsafter2004                        0.01480104 0.07932320
## gdp_per_capita_annual_growthrate_usd -0.01427566 0.02506492
## as.factor(area)US:yearsafter2004      0.04630775 0.10940681
mod.pois.od.5.metrics <- get_od_model_metrics(mod.pois.od.5, "Poisson")
mod.pois.od.5.metrics
mod.od.pois.metrics <- rbind(mod.pois.od.1.metrics, mod.pois.od.2.metrics, mod.pois.od.3.metrics, mod.pois.od.4.metrics, mod.pois.od.5.metrics)
mod.od.pois.metrics
mod.od.pois.metrics <- rbind(mod.pois.od.1.metrics, mod.pois.od.2.metrics, mod.pois.od.3.metrics, mod.pois.od.4.metrics, mod.pois.od.5.metrics)

aic_values <- round(mod.od.pois.metrics$aic)
rmse_values <- round(mod.od.pois.metrics$rmse, 2)
logLik_vlues <- round(mod.od.pois.metrics$LogLik, 2)


#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.pois.od.1
m2 <- mod.pois.od.2
m3 <- mod.pois.od.3
m4 <- mod.pois.od.4
m5 <- mod.pois.od.5 


stargazer(m1, m2, m3, m4, m5, title="Poisson Models of Annual Orphan Drug Market Authorizations (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/PoissonorphandrugMAmodelsNonAdjusted.htm",
          covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
          dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", logLik_vlues)))
## 
## <table style="text-align:center"><caption><strong>Poisson Models of Annual Orphan Drug Market Authorizations (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>1.344<sup>***</sup></td><td>1.344<sup>***</sup></td><td>1.339<sup>***</sup></td><td>0.489<sup>***</sup></td><td>0.507<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.078)</td><td>(0.078)</td><td>(0.078)</td><td>(0.179)</td><td>(0.179)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>0.108<sup>***</sup></td><td>0.107<sup>***</sup></td><td>0.046<sup>***</sup></td><td>0.047<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.007)</td><td>(0.007)</td><td>(0.014)</td><td>(0.014)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.009</td><td></td><td>0.005</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td><td></td><td>(0.007)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>0.080<sup>***</sup></td><td>0.078<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.016)</td><td>(0.016)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>2.433<sup>***</sup></td><td>1.366<sup>***</sup></td><td>1.348<sup>***</sup></td><td>2.014<sup>***</sup></td><td>1.987<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.070)</td><td>(0.103)</td><td>(0.103)</td><td>(0.150)</td><td>(0.154)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>554</td><td>273</td><td>273</td><td>250</td><td>251</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>20.84</td><td>8.34</td><td>8.45</td><td>7.54</td><td>7.63</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-275.04</td><td>-133.55</td><td>-132.65</td><td>-121</td><td>-120.69</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.pois.od.nw.1
m2 <- mod.pois.od.nw.2
m3 <- mod.pois.od.nw.3
m4 <- mod.pois.od.nw.4
m5 <- mod.pois.od.nw.5 

stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Poisson Models of Orphan Drug Market Authorizations", align=TRUE, type = "html", out="Final Models Formatted/PoissonorphandrugMAmodels.htm",
          covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
          dep.var.labels=c("Annual Number of Orphan Drug Market Authorizations"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", logLik_vlues)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Poisson Models of Orphan Drug Market Authorizations</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Orphan Drug Market Authorizations</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>1.344<sup>***</sup></td><td>1.344<sup>***</sup></td><td>1.339<sup>***</sup></td><td>0.489<sup>**</sup></td><td>0.507<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.171)</td><td>(0.173)</td><td>(0.178)</td><td>(0.211)</td><td>(0.203)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>0.108<sup>***</sup></td><td>0.107<sup>***</sup></td><td>0.046<sup>***</sup></td><td>0.047<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.012)</td><td>(0.012)</td><td>(0.017)</td><td>(0.016)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.009</td><td></td><td>0.005</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.014)</td><td></td><td>(0.010)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>0.080<sup>***</sup></td><td>0.078<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.017)</td><td>(0.016)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>2.433<sup>***</sup></td><td>1.366<sup>***</sup></td><td>1.348<sup>***</sup></td><td>2.014<sup>***</sup></td><td>1.987<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.123)</td><td>(0.224)</td><td>(0.196)</td><td>(0.218)</td><td>(0.201)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>554</td><td>273</td><td>273</td><td>250</td><td>251</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>20.84</td><td>8.34</td><td>8.45</td><td>7.54</td><td>7.63</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-275.04</td><td>-133.55</td><td>-132.65</td><td>-121</td><td>-120.69</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Linear Models of Number of Market Authorizations for NEW Orphan Drugs

Plot Models of Number of Orphan Drug Market Authorizations for NEW Orphan Drugs (Linear or Poisson Models)

plot_lin_pois_new_od <- function(model, model_number, model_type){
  
  #Plot residuals and acf/ pacf of residuals by panel (EU/US)
  predicted <- predict(model , type = "response")
  actual <- combined_rd_ma_df$freq_new_orphan_drug_mas

  combined_rd_ma_df$residuals <- actual-predicted#resid(model)
  
  par(mfrow =c(2,3))
  
  us_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "US",]
  eu_resid_data <- combined_rd_ma_df[combined_rd_ma_df$area == "EU",]
  
  plot(us_resid_data$year, us_resid_data$residuals, xlab="Year", main="US Residuals", ylab="Residuals")
  acf(us_resid_data$residuals, main="ACF of US Residuals")
  pacf(us_resid_data$residuals, main="PACF of US Residuals")
  #qqnorm(us_resid_data$residuals, main = "QQ Plot of US Residuals")
  
  plot(eu_resid_data$year, eu_resid_data$residuals, xlab="Year", main="EU Residuals", ylab="Residuals")
  acf(eu_resid_data$residuals, main="ACF of EU Residuals")
  pacf(eu_resid_data$residuals, main="PACF of EU Residuals")
  #qqnorm(eu_resid_data$residuals, main = "QQ Plot of EU Residuals")
  
  mtext(glue('{model_type} Model {model_number} of MAs for New Orphan Drugs - Residual Plots'), side = 3, line = -1.1, outer = TRUE)

  ggplot(combined_rd_ma_df, aes(x = year, y = freq_new_orphan_drug_mas, color = area, alpha="Observed")) +
    geom_point() +
    labs(x = "Year",
         y = str_wrap("Number of Market Authorizations for New Orphan Drugs", width = 40),
         color = "Region",
         alpha = " ",
         title=str_wrap(glue("{model_type} Model {model_number} of Market Authorizations for New Orphan Drugs"), width = 60))+
    scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
    geom_line(aes(x=year, y=predicted, alpha = "Fitted"))+
    scale_alpha_manual(values = c(0.5, 1))+
  theme_minimal()
    
  # https://aosmith.rbind.io/2020/07/09/ggplot2-override-aes/
  #Controlling legend appearance in ggplot2 with override.aes
  #July 9, 2020 ·  @aosmith16 
  
  # https://www.tidyverse.org/blog/2024/02/ggplot2-3-5-0-legends/
  #Teun van den Brand
  #2024/02/26

}

Get Metrics of Models of Number of Market Authorizations for NEW Orphan Drugs (Linear or Poisson Models)

get_new_od_model_metrics <- function(model, model_type){
  #Get RMSE
  predicted <- predict(model, type = "response")
  actual <- combined_rd_ma_df$freq_new_orphan_drug_mas
  model_rmse <- Metrics::rmse(actual=actual, predicted=predicted)
  
  model_df1 <- NA
  model_df2 <- NA
  model_adjRsq <- NA
  model_fstat <- NA
  model_pval <- NA
  model_loglik <- NA
  
  if(model_type == "Linear"){
    
    model_df1 <- summary(model)$fstatistic[2]
    model_df2 <- summary(model)$fstatistic[3]
    model_fstat <- summary(model)$fstatistic[1]
    model_pval <- pf(model_fstat, model_df1, model_df2, lower.tail = FALSE) 
    # https://www.reneshbedre.com/blog/get-f-stat-p-value-from-lm.html
    # Renesh Bedre
    # March 26, 2023
  
    model_adjRsq <- summary(model)$adj.r.squared
  }
  
  if(model_type == "Poisson"){
    model_loglik <- logLik(model)
  }
  
  model_aic <- AIC(model)
  
  metrics_df <- data.frame("rmse" = model_rmse, "aic"= model_aic, "df1" = model_df1, "df2" = model_df2, "fstat" = model_fstat, "pval" = model_pval, "adjrsq" = model_adjRsq, "LogLik" = model_loglik)
  rownames(metrics_df) <- NULL


  return(metrics_df)
}

Linear Model 1 of Annual MAs for NEW orphan drugs

mod.new.od.1 <- lm(freq_new_orphan_drug_mas ~ as.factor(area), data=combined_rd_ma_df)
summary(mod.new.od.1)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area), data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -14.667  -6.611  -1.667   4.347  25.333 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         10.611      2.372   4.473 8.20e-05 ***
## as.factor(area)US   15.056      3.355   4.488 7.85e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.06 on 34 degrees of freedom
## Multiple R-squared:  0.372,  Adjusted R-squared:  0.3535 
## F-statistic: 20.14 on 1 and 34 DF,  p-value: 7.85e-05
plot_lin_pois_new_od(mod.new.od.1, "1", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.1.vcov <- vcovPL(mod.new.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.1 <-coeftest(mod.new.od.1, vcov = mod.new.od.1.vcov)
mod.new.od.nw.1
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)        10.6111     1.5389  6.8954  6.07e-08 ***
## as.factor(area)US  15.0556     3.4953  4.3074 0.0001331 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.1.metrics <- get_new_od_model_metrics(mod.new.od.1, "Linear")
confint(mod.new.od.nw.1)
##                      2.5 %   97.5 %
## (Intercept)       7.483735 13.73849
## as.factor(area)US 7.952272 22.15884

Linear Model 2 of Annual MAs for NEW orphan drugs

mod.new.od.2 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df)
summary(mod.new.od.2)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.9653  -3.6720  -0.9102   3.6305  14.5400 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.621      2.381  -0.681    0.501    
## as.factor(area)US   15.056      2.200   6.844 8.25e-08 ***
## yearsafter2004       1.439      0.212   6.788 9.69e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.6 on 33 degrees of freedom
## Multiple R-squared:  0.7379, Adjusted R-squared:  0.722 
## F-statistic: 46.45 on 2 and 33 DF,  p-value: 2.539e-10
plot_lin_pois_new_od(mod.new.od.2, "2", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.2.vcov <- vcovPL(mod.new.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.2 <-coeftest(mod.new.od.2, vcov = mod.new.od.2.vcov)
mod.new.od.nw.2
## 
## t test of coefficients:
## 
##                   Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)       -1.62135    2.67095 -0.6070 0.5479860    
## as.factor(area)US 15.05556    3.54785  4.2436 0.0001677 ***
## yearsafter2004     1.43911    0.13805 10.4249 5.653e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.2.metrics <- get_new_od_model_metrics(mod.new.od.2, "Linear")

Linear Model 3 of Annual MAs for NEW orphan drugs

mod.new.od.3 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.new.od.3)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     gdp_per_capita_annual_growthrate_usd, data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2793  -4.3715  -0.0866   3.1109  15.3070 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                           -2.4307     2.5443  -0.955    0.347    
## as.factor(area)US                     15.0416     2.2053   6.821 1.03e-07 ***
## yearsafter2004                         1.4690     0.2150   6.832 9.99e-08 ***
## gdp_per_capita_annual_growthrate_usd   0.1730     0.1886   0.917    0.366    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.616 on 32 degrees of freedom
## Multiple R-squared:  0.7446, Adjusted R-squared:  0.7207 
## F-statistic:  31.1 on 3 and 32 DF,  p-value: 1.318e-09
plot_lin_pois_new_od(mod.new.od.3, "3", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.3.vcov <- vcovPL(mod.new.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.3 <-coeftest(mod.new.od.3, vcov = mod.new.od.3.vcov)
mod.new.od.nw.3
## 
## t test of coefficients:
## 
##                                      Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                          -2.43067    2.04420 -1.1891 0.2431670    
## as.factor(area)US                    15.04162    3.49062  4.3092 0.0001459 ***
## yearsafter2004                        1.46901    0.12625 11.6357 4.919e-13 ***
## gdp_per_capita_annual_growthrate_usd  0.17301    0.17010  1.0171 0.3167178    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.3.metrics <- get_new_od_model_metrics(mod.new.od.3, "Linear")

confint(mod.new.od.nw.3)
##                                           2.5 %     97.5 %
## (Intercept)                          -6.5945632  1.7332329
## as.factor(area)US                     7.9314571 22.1517802
## yearsafter2004                        1.2118478  1.7261772
## gdp_per_capita_annual_growthrate_usd -0.1734628  0.5194828

Linear Model 4 of Annual MAs for NEW orphan drugs

mod.new.od.4 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df)
summary(mod.new.od.4)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     as.factor(area):yearsafter2004, data = combined_rd_ma_df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5191  -2.7497   0.2246   3.6370   8.5067 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        5.2164     2.2767   2.291  0.02868 *  
## as.factor(area)US                  1.3801     3.2197   0.429  0.67105    
## yearsafter2004                     0.6347     0.2286   2.776  0.00912 ** 
## as.factor(area)US:yearsafter2004   1.6089     0.3233   4.976 2.13e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.032 on 32 degrees of freedom
## Multiple R-squared:  0.8522, Adjusted R-squared:  0.8384 
## F-statistic: 61.52 on 3 and 32 DF,  p-value: 2.213e-13
plot_lin_pois_new_od(mod.new.od.4, "4", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.4.vcov <- vcovPL(mod.new.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.4 <-coeftest(mod.new.od.4, vcov = mod.new.od.4.vcov)
mod.new.od.nw.4
## 
## t test of coefficients:
## 
##                                  Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                       5.21637    1.45018  3.5971   0.00107 ** 
## as.factor(area)US                 1.38012    2.39490  0.5763   0.56846    
## yearsafter2004                    0.63467    0.13336  4.7590 3.999e-05 ***
## as.factor(area)US:yearsafter2004  1.60888    0.24304  6.6198 1.827e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.4.metrics <- get_new_od_model_metrics(mod.new.od.4, "Linear")

confint(mod.new.od.nw.4)
##                                       2.5 %    97.5 %
## (Intercept)                       2.2624561 8.1702924
## as.factor(area)US                -3.4981245 6.2583584
## yearsafter2004                    0.3630212 0.9063287
## as.factor(area)US:yearsafter2004  1.1138186 2.1039316
predicted <- predict(mod.new.od.4, type = "response")

# Plot of Annual Orphan Drug Market Authorizations by R&D spending
ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_new_orphan_drug_mas, color = area, alpha="Observed")) +
  geom_point() +
  labs(x = "Domestic R&D Spending (Billion USD)",
       y = str_wrap("Number of Market Authorizations for New Orphan Drugs", width = 40),
       color = "Region",
       alpha = "",
       title=str_wrap(glue("Linear Model 4 of Market Authorizations for New Orphan Drugs by Domestic R&D Spending"), width = 60))+
  scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  geom_line(aes(x=annual_domestic_RD_spending_bil_dollars, y=predicted, alpha = "Fitted"))+
  scale_alpha_manual(values = c(0.5, 1))

theme_minimal()
## List of 136
##  $ line                            :List of 6
##   ..$ colour       : chr "black"
##   ..$ linewidth    : num 0.5
##   ..$ linetype     : num 1
##   ..$ lineend      : chr "butt"
##   ..$ arrow        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_line" "element"
##  $ rect                            :List of 5
##   ..$ fill         : chr "white"
##   ..$ colour       : chr "black"
##   ..$ linewidth    : num 0.5
##   ..$ linetype     : num 1
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_rect" "element"
##  $ text                            :List of 11
##   ..$ family       : chr ""
##   ..$ face         : chr "plain"
##   ..$ colour       : chr "black"
##   ..$ size         : num 11
##   ..$ hjust        : num 0.5
##   ..$ vjust        : num 0.5
##   ..$ angle        : num 0
##   ..$ lineheight   : num 0.9
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : logi FALSE
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ title                           : NULL
##  $ aspect.ratio                    : NULL
##  $ axis.title                      : NULL
##  $ axis.title.x                    :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.75points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.top                :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.75points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.x.bottom             : NULL
##  $ axis.title.y                    :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num 90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.75points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.title.y.left               : NULL
##  $ axis.title.y.right              :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : num -90
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.75points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text                       :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : chr "grey30"
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 1
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 2.2points 0points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.top                 :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : NULL
##   ..$ vjust        : num 0
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 2.2points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.x.bottom              : NULL
##  $ axis.text.y                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 1
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 0points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.y.left                : NULL
##  $ axis.text.y.right               :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 0points 0points 2.2points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.text.theta                 : NULL
##  $ axis.text.r                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0.5
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : 'margin' num [1:4] 0points 2.2points 0points 2.2points
##   .. ..- attr(*, "unit")= int 8
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ axis.ticks                      : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.ticks.x                    : NULL
##  $ axis.ticks.x.top                : NULL
##  $ axis.ticks.x.bottom             : NULL
##  $ axis.ticks.y                    : NULL
##  $ axis.ticks.y.left               : NULL
##  $ axis.ticks.y.right              : NULL
##  $ axis.ticks.theta                : NULL
##  $ axis.ticks.r                    : NULL
##  $ axis.minor.ticks.x.top          : NULL
##  $ axis.minor.ticks.x.bottom       : NULL
##  $ axis.minor.ticks.y.left         : NULL
##  $ axis.minor.ticks.y.right        : NULL
##  $ axis.minor.ticks.theta          : NULL
##  $ axis.minor.ticks.r              : NULL
##  $ axis.ticks.length               : 'simpleUnit' num 2.75points
##   ..- attr(*, "unit")= int 8
##  $ axis.ticks.length.x             : NULL
##  $ axis.ticks.length.x.top         : NULL
##  $ axis.ticks.length.x.bottom      : NULL
##  $ axis.ticks.length.y             : NULL
##  $ axis.ticks.length.y.left        : NULL
##  $ axis.ticks.length.y.right       : NULL
##  $ axis.ticks.length.theta         : NULL
##  $ axis.ticks.length.r             : NULL
##  $ axis.minor.ticks.length         : 'rel' num 0.75
##  $ axis.minor.ticks.length.x       : NULL
##  $ axis.minor.ticks.length.x.top   : NULL
##  $ axis.minor.ticks.length.x.bottom: NULL
##  $ axis.minor.ticks.length.y       : NULL
##  $ axis.minor.ticks.length.y.left  : NULL
##  $ axis.minor.ticks.length.y.right : NULL
##  $ axis.minor.ticks.length.theta   : NULL
##  $ axis.minor.ticks.length.r       : NULL
##  $ axis.line                       : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ axis.line.x                     : NULL
##  $ axis.line.x.top                 : NULL
##  $ axis.line.x.bottom              : NULL
##  $ axis.line.y                     : NULL
##  $ axis.line.y.left                : NULL
##  $ axis.line.y.right               : NULL
##  $ axis.line.theta                 : NULL
##  $ axis.line.r                     : NULL
##  $ legend.background               : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.margin                   : 'margin' num [1:4] 5.5points 5.5points 5.5points 5.5points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing                  : 'simpleUnit' num 11points
##   ..- attr(*, "unit")= int 8
##  $ legend.spacing.x                : NULL
##  $ legend.spacing.y                : NULL
##  $ legend.key                      : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.key.size                 : 'simpleUnit' num 1.2lines
##   ..- attr(*, "unit")= int 3
##  $ legend.key.height               : NULL
##  $ legend.key.width                : NULL
##  $ legend.key.spacing              : 'simpleUnit' num 5.5points
##   ..- attr(*, "unit")= int 8
##  $ legend.key.spacing.x            : NULL
##  $ legend.key.spacing.y            : NULL
##  $ legend.frame                    : NULL
##  $ legend.ticks                    : NULL
##  $ legend.ticks.length             : 'rel' num 0.2
##  $ legend.axis.line                : NULL
##  $ legend.text                     :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : 'rel' num 0.8
##   ..$ hjust        : NULL
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.text.position            : NULL
##  $ legend.title                    :List of 11
##   ..$ family       : NULL
##   ..$ face         : NULL
##   ..$ colour       : NULL
##   ..$ size         : NULL
##   ..$ hjust        : num 0
##   ..$ vjust        : NULL
##   ..$ angle        : NULL
##   ..$ lineheight   : NULL
##   ..$ margin       : NULL
##   ..$ debug        : NULL
##   ..$ inherit.blank: logi TRUE
##   ..- attr(*, "class")= chr [1:2] "element_text" "element"
##  $ legend.title.position           : NULL
##  $ legend.position                 : chr "right"
##  $ legend.position.inside          : NULL
##  $ legend.direction                : NULL
##  $ legend.byrow                    : NULL
##  $ legend.justification            : chr "center"
##  $ legend.justification.top        : NULL
##  $ legend.justification.bottom     : NULL
##  $ legend.justification.left       : NULL
##  $ legend.justification.right      : NULL
##  $ legend.justification.inside     : NULL
##  $ legend.location                 : NULL
##  $ legend.box                      : NULL
##  $ legend.box.just                 : NULL
##  $ legend.box.margin               : 'margin' num [1:4] 0cm 0cm 0cm 0cm
##   ..- attr(*, "unit")= int 1
##  $ legend.box.background           : list()
##   ..- attr(*, "class")= chr [1:2] "element_blank" "element"
##  $ legend.box.spacing              : 'simpleUnit' num 11points
##   ..- attr(*, "unit")= int 8
##   [list output truncated]
##  - attr(*, "class")= chr [1:2] "theme" "gg"
##  - attr(*, "complete")= logi TRUE
##  - attr(*, "validate")= logi TRUE

Linear Model 5 of Annual MAs for NEW orphan drugs

mod.new.od.5 <- lm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df)
summary(mod.new.od.5)
## 
## Call:
## lm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, 
##     data = combined_rd_ma_df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.439  -2.862   0.288   3.660   8.842 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                            4.8560     2.4843   1.955  0.05969 .  
## as.factor(area)US                      1.5564     3.2941   0.472  0.63990    
## yearsafter2004                         0.6553     0.2376   2.758  0.00967 ** 
## gdp_per_capita_annual_growthrate_usd   0.0577     0.1474   0.391  0.69814    
## as.factor(area)US:yearsafter2004       1.5876     0.3322   4.779 4.04e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.1 on 31 degrees of freedom
## Multiple R-squared:  0.853,  Adjusted R-squared:  0.834 
## F-statistic: 44.96 on 4 and 31 DF,  p-value: 1.77e-12
plot_lin_pois_new_od(mod.new.od.5, "5", "Linear")

#Estimate covariance matrix with Newey West
mod.new.od.5.vcov <- vcovPL(mod.new.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.new.od.nw.5 <-coeftest(mod.new.od.5, vcov = mod.new.od.5.vcov)
mod.new.od.nw.5
## 
## t test of coefficients:
## 
##                                      Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)                           4.85601    1.44648  3.3571  0.002096 ** 
## as.factor(area)US                     1.55638    2.48325  0.6268  0.535411    
## yearsafter2004                        0.65529    0.14399  4.5509 7.746e-05 ***
## gdp_per_capita_annual_growthrate_usd  0.05770    0.13160  0.4385  0.664093    
## as.factor(area)US:yearsafter2004      1.58759    0.26367  6.0212 1.155e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.new.od.5.metrics <- get_new_od_model_metrics(mod.new.od.5, "Linear")

confint(mod.new.od.nw.5)
##                                           2.5 %    97.5 %
## (Intercept)                           1.9058856 7.8061280
## as.factor(area)US                    -3.5082423 6.6209986
## yearsafter2004                        0.3616198 0.9489572
## gdp_per_capita_annual_growthrate_usd -0.2106931 0.3260928
## as.factor(area)US:yearsafter2004      1.0498405 2.1253429
predicted <- predict(mod.new.od.5, type = "response")

# Plot of Annual Orphan Drug Market Authorizations by R&D spending
ggplot(combined_rd_ma_df, aes(x = annual_domestic_RD_spending_bil_dollars, y = freq_new_orphan_drug_mas, color = area, alpha="Observed")) +
  geom_point() +
  labs(x = "Domestic R&D Spending (Billion USD)",
       y = str_wrap("Number of Market Authorizations for New Orphan Drugs", width = 40),
       color = "Region",
       alpha = "",
       title=str_wrap(glue("Linear Model 5 of Market Authorizations for New Orphan Drugs by Domestic R&D Spending"), width = 60))+
  scale_color_manual(name = "Region", values = c('US' = 'darkblue', 'EU' = '#E66100')) +
  geom_line(aes(x=annual_domestic_RD_spending_bil_dollars, y=predicted, alpha = "Fitted"))+
  scale_alpha_manual(values = c(0.5, 1))+
theme_minimal()

mod.new.od.metrics <- rbind(mod.new.od.1.metrics, mod.new.od.2.metrics, mod.new.od.3.metrics, mod.new.od.4.metrics, mod.new.od.5.metrics)
aic_values <- round(mod.new.od.metrics$aic)
rmse_values <- round(mod.new.od.metrics$rmse, 2)
fstat_values <- round(mod.new.od.metrics$fstat, 2)
p_values <- round(mod.new.od.metrics$pval, 4)
adjrsq_values <- round(mod.new.od.metrics$adjrsq, 2)

#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.new.od.1
m2 <- mod.new.od.2
m3 <- mod.new.od.3
m4 <- mod.new.od.4
m5 <- mod.new.od.5 

stargazer(m1, m2, m3, m4, m5, title="Linear Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/NEWorphandrugMAmodelsNonAdjusted.htm",
          covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
          dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Linear Models of Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>15.056<sup>***</sup></td><td>15.056<sup>***</sup></td><td>15.042<sup>***</sup></td><td>1.380</td><td>1.556</td></tr>
## <tr><td style="text-align:left"></td><td>(3.355)</td><td>(2.200)</td><td>(2.205)</td><td>(3.220)</td><td>(3.294)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>1.439<sup>***</sup></td><td>1.469<sup>***</sup></td><td>0.635<sup>***</sup></td><td>0.655<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.212)</td><td>(0.215)</td><td>(0.229)</td><td>(0.238)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.173</td><td></td><td>0.058</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.189)</td><td></td><td>(0.147)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>1.609<sup>***</sup></td><td>1.588<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.323)</td><td>(0.332)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>10.611<sup>***</sup></td><td>-1.621</td><td>-2.431</td><td>5.216<sup>**</sup></td><td>4.856<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(2.372)</td><td>(2.381)</td><td>(2.544)</td><td>(2.277)</td><td>(2.484)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>272</td><td>243</td><td>244</td><td>224</td><td>226</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>9.78</td><td>6.32</td><td>6.24</td><td>4.74</td><td>4.73</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.35</td><td>0.72</td><td>0.72</td><td>0.84</td><td>0.83</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>20.14</td><td>46.45</td><td>31.1</td><td>61.52</td><td>44.96</td></tr>
## <tr><td style="text-align:left">p-value</td><td>1e-04</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.new.od.nw.1
m2 <- mod.new.od.nw.2
m3 <- mod.new.od.nw.3
m4 <- mod.new.od.nw.4
m5 <- mod.new.od.nw.5 


stargazer(m1, m2, m3, m4, m5, title="Newey-West Adjusted Linear Models of Market Authorizations for New Orphan Drugs", align=TRUE, type = "html", out="Final Models Formatted/NEWorphandrugMAmodels.htm",
          covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
          dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Adj R^{2}", adjrsq_values), c("F Statistic", fstat_values), c("p-value", p_values)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Linear Models of Market Authorizations for New Orphan Drugs</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>15.056<sup>***</sup></td><td>15.056<sup>***</sup></td><td>15.042<sup>***</sup></td><td>1.380</td><td>1.556</td></tr>
## <tr><td style="text-align:left"></td><td>(3.495)</td><td>(3.548)</td><td>(3.491)</td><td>(2.395)</td><td>(2.483)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>1.439<sup>***</sup></td><td>1.469<sup>***</sup></td><td>0.635<sup>***</sup></td><td>0.655<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.138)</td><td>(0.126)</td><td>(0.133)</td><td>(0.144)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.173</td><td></td><td>0.058</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.170)</td><td></td><td>(0.132)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>1.609<sup>***</sup></td><td>1.588<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.243)</td><td>(0.264)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>10.611<sup>***</sup></td><td>-1.621</td><td>-2.431</td><td>5.216<sup>***</sup></td><td>4.856<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(1.539)</td><td>(2.671)</td><td>(2.044)</td><td>(1.450)</td><td>(1.446)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>272</td><td>243</td><td>244</td><td>224</td><td>226</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>9.78</td><td>6.32</td><td>6.24</td><td>4.74</td><td>4.73</td></tr>
## <tr><td style="text-align:left">Adj R<sup>2</sup></td><td>0.35</td><td>0.72</td><td>0.72</td><td>0.84</td><td>0.83</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>20.14</td><td>46.45</td><td>31.1</td><td>61.52</td><td>44.96</td></tr>
## <tr><td style="text-align:left">p-value</td><td>1e-04</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>

Overall mean of Domestic R&D spending

mean(combined_rd_ma_df$annual_domestic_RD_spending_bil_dollars)
## [1] 42.29366

Overall mean of US Domestic R&D spending

mean(subset(combined_rd_ma_df ,area=="US")$annual_domestic_RD_spending_bil_dollars)
## [1] 46.25642

Overall mean of annual EU Domestic R&D spending

mean(subset(combined_rd_ma_df ,area=="EU")$annual_domestic_RD_spending_bil_dollars)
## [1] 38.3309

Overall mean of annual number of US Market Authroized Orphan Drugs

mean(subset(combined_rd_ma_df ,area=="US")$freq_orphan_drug_mas)
## [1] 43.66667
mean(subset(combined_rd_ma_df ,area=="US")$freq_new_orphan_drug_mas)
## [1] 25.66667

Overall mean of annual number of EU Market Authroized Orphan Drugs

mean(subset(combined_rd_ma_df ,area=="EU")$freq_orphan_drug_mas)
## [1] 11.38889
mean(subset(combined_rd_ma_df ,area=="EU")$freq_new_orphan_drug_mas)
## [1] 10.61111

Poisson Models for Number of Market Authorizations for NEW Orphan Drugs

Poisson model 1 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.1 <- glm(freq_new_orphan_drug_mas ~ as.factor(area), data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.1)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area), family = poisson(link = "log"), 
##     data = combined_rd_ma_df)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        2.36190    0.07236   32.64   <2e-16 ***
## as.factor(area)US  0.88329    0.08602   10.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 275.24  on 35  degrees of freedom
## Residual deviance: 159.30  on 34  degrees of freedom
## AIC: 326.27
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.1, "1", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.1.vcov <- vcovPL(mod.pois.new.od.1, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.1 <- coeftest(mod.pois.new.od.1, vcov = mod.pois.new.od.1.vcov)
mod.pois.new.od.nw.1
## 
## z test of coefficients:
## 
##                   Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)       2.361902   0.145025 16.2862 < 2.2e-16 ***
## as.factor(area)US 0.883291   0.093369  9.4603 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.1.metrics <- get_new_od_model_metrics(mod.pois.new.od.1, "Poisson")
mod.pois.new.od.1.metrics
confint(mod.pois.new.od.nw.1)
##                       2.5 %   97.5 %
## (Intercept)       2.0776576 2.646146
## as.factor(area)US 0.7002924 1.066291

Poisson model 2 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.2 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.2)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       1.573894   0.111327   14.14   <2e-16 ***
## as.factor(area)US 0.883291   0.086024   10.27   <2e-16 ***
## yearsafter2004    0.082198   0.007955   10.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 275.236  on 35  degrees of freedom
## Residual deviance:  46.701  on 33  degrees of freedom
## AIC: 215.67
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.2, "2", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.2.vcov <- vcovPL(mod.pois.new.od.2, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.2 <- coeftest(mod.pois.new.od.2, vcov = mod.pois.new.od.2.vcov)
mod.pois.new.od.nw.2
## 
## z test of coefficients:
## 
##                    Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)       1.5738944  0.1099422 14.3157 < 2.2e-16 ***
## as.factor(area)US 0.8832915  0.0947727  9.3201 < 2.2e-16 ***
## yearsafter2004    0.0821981  0.0065146 12.6175 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.2.metrics <- get_new_od_model_metrics(mod.pois.new.od.2, "Poisson")
mod.pois.new.od.2.metrics
confint(mod.pois.new.od.nw.2)
##                        2.5 %     97.5 %
## (Intercept)       1.35841158 1.78937715
## as.factor(area)US 0.69754035 1.06904257
## yearsafter2004    0.06942973 0.09496652

Poisson model 3 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.3 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log"))
summary(mod.pois.new.od.3)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     gdp_per_capita_annual_growthrate_usd, family = poisson(link = "log"), 
##     data = combined_rd_ma_df)
## 
## Coefficients:
##                                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          1.5723230  0.1127702  13.943   <2e-16 ***
## as.factor(area)US                    0.8828314  0.0861801  10.244   <2e-16 ***
## yearsafter2004                       0.0821682  0.0079577  10.326   <2e-16 ***
## gdp_per_capita_annual_growthrate_usd 0.0006891  0.0079618   0.087    0.931    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 275.236  on 35  degrees of freedom
## Residual deviance:  46.693  on 32  degrees of freedom
## AIC: 217.66
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.3, "3", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.3.vcov <- vcovPL(mod.pois.new.od.3, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.3 <- coeftest(mod.pois.new.od.3, vcov = mod.pois.new.od.3.vcov)
mod.pois.new.od.nw.3
## 
## z test of coefficients:
## 
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                          1.57232299 0.10319966 15.2357   <2e-16 ***
## as.factor(area)US                    0.88283138 0.10090233  8.7494   <2e-16 ***
## yearsafter2004                       0.08216822 0.00652632 12.5903   <2e-16 ***
## gdp_per_capita_annual_growthrate_usd 0.00068905 0.01170778  0.0589   0.9531    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.3.metrics <- get_new_od_model_metrics(mod.pois.new.od.3, "Poisson")
mod.pois.new.od.3.metrics
confint(mod.pois.new.od.nw.3)
##                                            2.5 %     97.5 %
## (Intercept)                           1.37005537 1.77459062
## as.factor(area)US                     0.68506645 1.08059631
## yearsafter2004                        0.06937687 0.09495957
## gdp_per_capita_annual_growthrate_usd -0.02225778 0.02363589

Poisson model 4 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.4 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + as.factor(area):yearsafter2004, data=combined_rd_ma_df, family=poisson(link="log")) 
summary(mod.pois.new.od.4)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     as.factor(area):yearsafter2004, family = poisson(link = "log"), 
##     data = combined_rd_ma_df)
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                       1.79374    0.16228  11.053  < 2e-16 ***
## as.factor(area)US                 0.56583    0.19821   2.855  0.00431 ** 
## yearsafter2004                    0.06101    0.01437   4.246 2.17e-05 ***
## as.factor(area)US:yearsafter2004  0.03028    0.01726   1.754  0.07948 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 275.236  on 35  degrees of freedom
## Residual deviance:  43.649  on 32  degrees of freedom
## AIC: 214.62
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.4, "4", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.4.vcov <- vcovPL(mod.pois.new.od.4, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.4 <- coeftest(mod.pois.new.od.4, vcov = mod.pois.new.od.4.vcov)
mod.pois.new.od.nw.4
## 
## z test of coefficients:
## 
##                                  Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                      1.793738   0.180770  9.9228 < 2.2e-16 ***
## as.factor(area)US                0.565835   0.197166  2.8698  0.004107 ** 
## yearsafter2004                   0.061008   0.014373  4.2446  2.19e-05 ***
## as.factor(area)US:yearsafter2004 0.030275   0.014985  2.0203  0.043350 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.4.metrics <- get_new_od_model_metrics(mod.pois.new.od.4, "Poisson")
mod.pois.new.od.4.metrics
confint(mod.pois.new.od.nw.4)
##                                         2.5 %     97.5 %
## (Intercept)                      1.4394360681 2.14803953
## as.factor(area)US                0.1793969585 0.95227281
## yearsafter2004                   0.0328372741 0.08917868
## as.factor(area)US:yearsafter2004 0.0009044294 0.05964557

Poisson model 5 of Annual Num of MAs for NEW orphan drugs

mod.pois.new.od.5 <- glm(freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, data=combined_rd_ma_df, family=poisson(link="log")) 
summary(mod.pois.new.od.5)
## 
## Call:
## glm(formula = freq_new_orphan_drug_mas ~ as.factor(area) + yearsafter2004 + 
##     yearsafter2004:as.factor(area) + gdp_per_capita_annual_growthrate_usd, 
##     family = poisson(link = "log"), data = combined_rd_ma_df)
## 
## Coefficients:
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           1.7971136  0.1660656  10.822  < 2e-16 ***
## as.factor(area)US                     0.5641872  0.1990723   2.834   0.0046 ** 
## yearsafter2004                        0.0608844  0.0144412   4.216 2.49e-05 ***
## gdp_per_capita_annual_growthrate_usd -0.0007685  0.0079038  -0.097   0.9225    
## as.factor(area)US:yearsafter2004      0.0304818  0.0174101   1.751   0.0800 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 275.236  on 35  degrees of freedom
## Residual deviance:  43.639  on 31  degrees of freedom
## AIC: 216.61
## 
## Number of Fisher Scoring iterations: 4
plot_lin_pois_new_od(mod.pois.new.od.5, "5", "Poisson")

#Estimate covariance matrix with Newey West
mod.pois.new.od.5.vcov <- vcovPL(mod.pois.new.od.5, cluster = ~ as.factor(combined_rd_ma_df$area), lag="NW1994")  

#Adjust standard errors based on estimated covariance matrix
mod.pois.new.od.nw.5 <- coeftest(mod.pois.new.od.5, vcov = mod.pois.new.od.5.vcov)
mod.pois.new.od.nw.5
## 
## z test of coefficients:
## 
##                                         Estimate  Std. Error z value  Pr(>|z|)
## (Intercept)                           1.79711364  0.17974494  9.9981 < 2.2e-16
## as.factor(area)US                     0.56418722  0.19477316  2.8966  0.003772
## yearsafter2004                        0.06088440  0.01473714  4.1314 3.606e-05
## gdp_per_capita_annual_growthrate_usd -0.00076852  0.01037691 -0.0741  0.940962
## as.factor(area)US:yearsafter2004      0.03048184  0.01482775  2.0557  0.039809
##                                         
## (Intercept)                          ***
## as.factor(area)US                    ** 
## yearsafter2004                       ***
## gdp_per_capita_annual_growthrate_usd    
## as.factor(area)US:yearsafter2004     *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod.pois.new.od.5.metrics <- get_new_od_model_metrics(mod.pois.new.od.5, "Poisson")
mod.pois.new.od.5.metrics
confint(mod.pois.new.od.nw.5)
##                                             2.5 %     97.5 %
## (Intercept)                           1.444820030 2.14940725
## as.factor(area)US                     0.182438843 0.94593560
## yearsafter2004                        0.032000135 0.08976867
## gdp_per_capita_annual_growthrate_usd -0.021106886 0.01956984
## as.factor(area)US:yearsafter2004      0.001419984 0.05954369
mod.pois.new.od.metrics <- rbind(mod.pois.new.od.1.metrics, mod.pois.new.od.2.metrics, mod.pois.new.od.3.metrics, mod.pois.new.od.4.metrics, mod.pois.new.od.5.metrics)
aic_values <- round(mod.pois.new.od.metrics$aic)
rmse_values <- round(mod.pois.new.od.metrics$rmse, 2)
loglik_values <- round(mod.pois.new.od.metrics$LogLik, 2)

#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.pois.new.od.1
m2 <- mod.pois.new.od.2
m3 <- mod.pois.new.od.3
m4 <- mod.pois.new.od.4
m5 <- mod.pois.new.od.5 

stargazer(m1, m2, m3, m4, m5, title="Poisson Models of Annual Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)", align=TRUE, type = "html", out="Final Models Formatted/PoissonNEWorphandrugMAmodelsNonAdjusted.htm",
          covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
          dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", loglik_values)))
## 
## <table style="text-align:center"><caption><strong>Poisson Models of Annual Market Authorizations for New Orphan Drugs (Not adjusted for Error Structure)</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>0.883<sup>***</sup></td><td>0.883<sup>***</sup></td><td>0.883<sup>***</sup></td><td>0.566<sup>***</sup></td><td>0.564<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.086)</td><td>(0.086)</td><td>(0.086)</td><td>(0.198)</td><td>(0.199)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>0.082<sup>***</sup></td><td>0.082<sup>***</sup></td><td>0.061<sup>***</sup></td><td>0.061<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.008)</td><td>(0.008)</td><td>(0.014)</td><td>(0.014)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.001</td><td></td><td>-0.001</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.008)</td><td></td><td>(0.008)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>0.030<sup>*</sup></td><td>0.030<sup>*</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.017)</td><td>(0.017)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>2.362<sup>***</sup></td><td>1.574<sup>***</sup></td><td>1.572<sup>***</sup></td><td>1.794<sup>***</sup></td><td>1.797<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.072)</td><td>(0.111)</td><td>(0.113)</td><td>(0.162)</td><td>(0.166)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>326</td><td>216</td><td>218</td><td>215</td><td>217</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>9.78</td><td>4.7</td><td>4.72</td><td>4.59</td><td>4.57</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-161.13</td><td>-104.84</td><td>-104.83</td><td>-103.31</td><td>-103.3</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#creating shorter names because stargazer has a length limit for the input of all models

m1 <- mod.pois.new.od.nw.1
m2 <- mod.pois.new.od.nw.2
m3 <- mod.pois.new.od.nw.3
m4 <- mod.pois.new.od.nw.4
m5 <- mod.pois.new.od.nw.5 

stargazer(m1,m2,m3,m4,m5, title="Newey-West Adjusted Poisson Models of Annual Market Authorizations for New Orphan Drugs ", align=TRUE, type = "html", out="Final Models Formatted/PoissonNEWorphandrugMAmodels.htm",
          covariate.labels = c("US", "Years After 2004", "Annual GDP Growth Rate Per Capita", "Years After 2004:US", "Intercept"),
          dep.var.labels=c("Annual Number of Market Authorizations for New Orphan Drugs"),
          column.labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5"),
          model.numbers=FALSE,
          omit.stat = c("all"),
          add.lines = list(c("AIC", aic_values), c("RMSE", rmse_values), c("Log Likelihood", loglik_values)))
## 
## <table style="text-align:center"><caption><strong>Newey-West Adjusted Poisson Models of Annual Market Authorizations for New Orphan Drugs</strong></caption>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="5"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="5" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="5">Annual Number of Market Authorizations for New Orphan Drugs</td></tr>
## <tr><td style="text-align:left"></td><td>Model 1</td><td>Model 2</td><td>Model 3</td><td>Model 4</td><td>Model 5</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">US</td><td>0.883<sup>***</sup></td><td>0.883<sup>***</sup></td><td>0.883<sup>***</sup></td><td>0.566<sup>***</sup></td><td>0.564<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.093)</td><td>(0.095)</td><td>(0.101)</td><td>(0.197)</td><td>(0.195)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004</td><td></td><td>0.082<sup>***</sup></td><td>0.082<sup>***</sup></td><td>0.061<sup>***</sup></td><td>0.061<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td>(0.007)</td><td>(0.007)</td><td>(0.014)</td><td>(0.015)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Annual GDP Growth Rate Per Capita</td><td></td><td></td><td>0.001</td><td></td><td>-0.001</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td>(0.012)</td><td></td><td>(0.010)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Years After 2004:US</td><td></td><td></td><td></td><td>0.030<sup>**</sup></td><td>0.030<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td>(0.015)</td><td>(0.015)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Intercept</td><td>2.362<sup>***</sup></td><td>1.574<sup>***</sup></td><td>1.572<sup>***</sup></td><td>1.794<sup>***</sup></td><td>1.797<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.145)</td><td>(0.110)</td><td>(0.103)</td><td>(0.181)</td><td>(0.180)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td><td></td><td></td><td></td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>326</td><td>216</td><td>218</td><td>215</td><td>217</td></tr>
## <tr><td style="text-align:left">RMSE</td><td>9.78</td><td>4.7</td><td>4.72</td><td>4.59</td><td>4.57</td></tr>
## <tr><td style="text-align:left">Log Likelihood</td><td>-161.13</td><td>-104.84</td><td>-104.83</td><td>-103.31</td><td>-103.3</td></tr>
## <tr><td colspan="6" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="5" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>